# Lyrics classifier

## Load packages

In [None]:
#load "Paket.fsx"

Paket.Package 
  ["XPlot.Plotly"
   "MathNet.Numerics"
   "MathNet.Numerics.FSharp"
   "FSharp.Data"
   "Microsoft.ML"]
   
#load "XPlot.Plotly.Paket.fsx"
#load "XPlot.Plotly.fsx"
#load "Paket.Generated.Refs.fsx"

## Namespaces import and type definition

In [None]:
open System
open System.Linq
open System.IO
open MathNet.Numerics
open MathNet.Numerics.Distributions
open MathNet.Numerics.LinearAlgebra
open MathNet.Numerics.Random
open FSharp.Data



[<CLIMutable>]
type LyricsInput = 
    {
        Song : string
        Artist : string
        Genre : string
        Lyrics : string
        Year: int
    }
    

## Data cleanup

In [None]:
    let trainDataPath = Path.Combine("./","Data","lyrics.csv")   
    let msft = CsvFile.Load(File.Open(trainDataPath, FileMode.Open), separators = ",", quote = '"', hasHeaders= true)
    
    let songLyrics = 
               msft.Rows
               |> Seq.filter (fun row -> not(row.GetColumn "lyrics" |> String.IsNullOrEmpty))
               |> Seq.filter (fun row -> not(String.Equals(row.GetColumn "lyrics", "[Instrumental]", StringComparison.OrdinalIgnoreCase)))
               |> Seq.take 50000
               |> Seq.map (fun row -> {  Song = (row.GetColumn "song")
                                         Artist = (row.GetColumn "artist")
                                         Genre = (row.GetColumn "genre")
                                         Lyrics = (row.GetColumn "lyrics").Replace(Environment.NewLine, ", ")
                                         Year = (row.GetColumn "year") |> int
                                      })
                              


### Visualize cleaned data

In [None]:
songLyrics
    |> Seq.iter(fun row -> Console.WriteLine(row))
       

## Chart analysis

In [None]:
open XPlot.Plotly

songLyrics
        |> Seq.map(fun row -> row.Genre)
        |> Seq.countBy id |> Seq.toList 
        |> Chart.Pie
        |> Chart.WithTitle "Dataset by Genre"
        |> Chart.WithLegend true

In [None]:
open XPlot.Plotly

songLyrics
        |> Seq.map(fun row -> row.Year)
        |> Seq.countBy id |> Seq.toList 
        |> Chart.Pie
        |> Chart.WithTitle "Dataset by Year"
        |> Chart.WithLegend true

In [None]:
open System.Text.RegularExpressions

let stopwords = [|"ourselves"; "hers"; "between"; "yourself"; "but"; "again"; "there"; "about"; "once"; "during"; "out"; "very"; "having"; "with"; "they"; "own"; "an"; "be"; "some"; "for"; "do"; "its"; "yours"; "such"; "into"; "of"; "most"; "itself"; "other"; "off"; "is"; "s"; "am"; "or"; "who"; "as"; "from"; "him"; "each"; "the"; "themselves"; "until"; "below"; "are"; "we"; "these"; "your"; "his"; "through"; "don"; "nor"; "me"; "were"; "her"; "more"; "himself"; "this"; "down"; "should"; "our"; "their"; "while"; "above"; "both"; "up"; "to"; "ours"; "had"; "she"; "all"; "no"; "when"; "at"; "any"; "before"; "them"; "same"; "and"; "been"; "have"; "in"; "will"; "on"; "does"; "yourselves"; "then"; "that"; "because"; "what"; "over"; "why"; "so"; "can"; "did"; "not"; "now"; "under"; "he"; "you"; "herself"; "has"; "just"; "where"; "too"; "only"; "myself"; "which"; "those"; "i"; "after"; "few"; "whom"; "t"; "being"; "if"; "theirs"; "my"; "against"; "a"; "by"; "doing"; "it"; "how"; "further"; "was"; "here"; "than"; "'s"; "n't"; "'m"; "'re"; "'ll";"'ve";"..."; "ä±"; "''"; "``"; "--"; "'d"; "el"; "la"; "que"; "y"|]
let symbols = ",.)(-:;?!"

let lyricsList = songLyrics
                    |> Seq.map(fun row -> row.Lyrics)

let stripChars text (chars:string) =
    Array.fold (
        fun (s:string) c -> s.Replace(c.ToString(),"")
    ) text (chars.ToCharArray())

let isNotStopWord (word:string) =
        not(stopwords |> Seq.exists(fun sw -> word.Contains(sw)))
        
let cleanWords (word:string) = stripChars word symbols                     
        
let getWords(text: string) =
        let words = Regex.Split(text, @"\s+")
        words 
            |> Seq.filter isNotStopWord 
            |> Seq.map cleanWords
            |> Seq.toList
            
let renderLineChartForWords(words: seq<string>) = 
            words
                |> Seq.countBy id 
                |> Seq.sortByDescending(fun (value:string, count :int) -> count)
                |> Seq.take 15
                |> Chart.Line
                
let getLyricsWords (lyrics : seq<string>) = 
            lyrics
                |> Seq.map(fun row -> row.ToLowerInvariant())
                |> Seq.map(fun lyric -> getWords lyric)
                |> Seq.concat
                |> Seq.filter(fun s-> not(String.IsNullOrEmpty(s)))
                        
let filterWordsByGenre (genre: string) =
            songLyrics
                |> Seq.filter(fun row -> row.Genre = genre)
                |> Seq.map(fun row -> row.Lyrics)
                |> getLyricsWords 
                
let filteredWords = getLyricsWords lyricsList


In [None]:
filteredWords |> renderLineChartForWords

In [None]:
filterWordsByGenre "Hip-Hop" |> renderLineChartForWords

In [None]:
filterWordsByGenre "Pop" |> renderLineChartForWords 

In [None]:
filterWordsByGenre "Jazz" |> renderLineChartForWords