In [1]:
import io
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(context='talk', style='ticks')
%matplotlib inline


# Import the file and name the column

In [2]:
df = pd.DataFrame(io.open('lyrics_scraped.txt', 'r', encoding='ascii', errors='ignore').read().split('\n\n'))
df.columns = ['Lyrics']

In [3]:
pd.set_option('display.max_rows', None)

In [4]:
df

Unnamed: 0,Lyrics
0,"###""From The Ground""###"
1,[Danny Brown:]\n8 balls wit a safety pin\nI de...
2,"[Kelela:]\nYou turn around, and now I'm winnin..."
3,"[Danny Brown:]\nNow tell me, would you sacrifi..."
4,"[Kelela:]\nYou turn around, and now I'm winnin..."
5,"###""Drinks On Me""###"
6,"###""Bag Back""###"
7,I lived my whole life for a green piece of pap...
8,And I dog these hoes like a Georgetown Hoya\nW...
9,I'm in the hood all day\nWhere the fuck are yo...


# Delete lines that don't have lyrics, or they are lyrics from other artists

In [7]:
new_df = df[df['Lyrics'].str.contains(']')]
new_df

Unnamed: 0,Lyrics
1,[Danny Brown:]\n8 balls wit a safety pin\nI de...
2,"[Kelela:]\nYou turn around, and now I'm winnin..."
3,"[Danny Brown:]\nNow tell me, would you sacrifi..."
4,"[Kelela:]\nYou turn around, and now I'm winnin..."
12,Trail of blood on that baggie\nI done scraped ...
13,[Hook]\nLike open wide ho!\nOpen wide ho!\nOpe...
14,[ScHoolboy Q]\nIf I tell a bitch to do a flip ...
15,[Hook]
17,[Verse 1:]\nRemember when my first meal was sc...
18,[Hook:]\nRushing at a kid just to be grown up\...


In [8]:
df_brown=new_df[df['Lyrics'].str.contains('Brown')]
df_brown

  """Entry point for launching an IPython kernel.


Unnamed: 0,Lyrics
1,[Danny Brown:]\n8 balls wit a safety pin\nI de...
3,"[Danny Brown:]\nNow tell me, would you sacrifi..."
40,[Verse 1: Danny Brown]\nI was thinkin' 'bout s...
42,[Verse 2: Danny Brown]\nAnd I kept lickin' on ...
44,[Verse 3: Scrufizzer]\nIt's the same old shit\...
86,[Verse 1: Danny Brown]\nWe been lookin at the ...
88,[Verse 2: Danny Brown]\nEarly morning next day...
107,[Verse 1: Danny Brown]\nFeeling like I got mon...
109,[Verse 2: Danny Brown]\nFeeling like I'm not a...
133,"[Verse 1:]\nI'm the first to breakfast, late f..."


In [9]:
df_verse = new_df[df['Lyrics'].str.contains('\[Verse')]
df_verse

  """Entry point for launching an IPython kernel.


Unnamed: 0,Lyrics
17,[Verse 1:]\nRemember when my first meal was sc...
19,[Verse 2:]\nI can eat a pound and shit sixty f...
40,[Verse 1: Danny Brown]\nI was thinkin' 'bout s...
42,[Verse 2: Danny Brown]\nAnd I kept lickin' on ...
44,[Verse 3: Scrufizzer]\nIt's the same old shit\...
47,"[Verse 1:]\nRemember one time, dawg\nThis fien..."
49,[Verse 2:]\nI feel like a prisoner of war\nRea...
52,[Verse 1]\nI was born one day before Saint Pat...
54,"[Verse 2]\nAround first grade, we move to the ..."
66,[Verse 1:]\nDrop a deuce in that soda\nTell yo...


In [10]:
df_hook = new_df[df['Lyrics'].str.contains('\[Hook')]
df_hook

  """Entry point for launching an IPython kernel.


Unnamed: 0,Lyrics
13,[Hook]\nLike open wide ho!\nOpen wide ho!\nOpe...
15,[Hook]
18,[Hook:]\nRushing at a kid just to be grown up\...
20,[Hook]
41,[Hook:]\nI had them dubs on the step\nI had th...
43,[Hook:]\nI had them dubs on the step\nI had th...
48,[Hook:]\nAnd it's torture\nLook in my mind and...
50,[Hook]
53,[Hook]\nYou're from the East Side\nYou're from...
55,[Hook]


In the above lines I created variables that have parts of the data cleaned. I removed the '[Hook]' lines, I removed the ## ## Lines, I removed lines that are not the verses of my selected artist. With the below code I merge these dataframes into one

In [11]:
df_clean = pd.concat([df_brown,df_verse,df_hook], ignore_index = True)
df_clean

Unnamed: 0,Lyrics
0,[Danny Brown:]\n8 balls wit a safety pin\nI de...
1,"[Danny Brown:]\nNow tell me, would you sacrifi..."
2,[Verse 1: Danny Brown]\nI was thinkin' 'bout s...
3,[Verse 2: Danny Brown]\nAnd I kept lickin' on ...
4,[Verse 3: Scrufizzer]\nIt's the same old shit\...
5,[Verse 1: Danny Brown]\nWe been lookin at the ...
6,[Verse 2: Danny Brown]\nEarly morning next day...
7,[Verse 1: Danny Brown]\nFeeling like I got mon...
8,[Verse 2: Danny Brown]\nFeeling like I'm not a...
9,"[Verse 1:]\nI'm the first to breakfast, late f..."


# Removing linebreaks and exporting to CSV

The below code removes \n linebreaks, import df_stripped back to a pd dataframe, as it became a list of strings, then removes the brackets from the beginning and exports that datafram to a csv file

In [33]:
df_stripped = df_clean['Lyrics'].str.split('\\n').str[1:]
df_stripped

0     [8 balls wit a safety pin, I deserve the finer...
1     [Now tell me, would you sacrifice your dreams?...
2     [I was thinkin' 'bout somethin', But I ain't w...
3     [And I kept lickin' on that clit, Till she gav...
4     [It's the same old shit, Everybody in a manner...
5     [We been lookin at the house for a bout a whol...
6     [Early morning next day at the junkyard, Got f...
7     [Feeling like I got money, No, I'm just nigga ...
8     [Feeling like I'm not alive, But I know I'm no...
9     [I'm the first to breakfast, late for dinner, ...
10    [I'm the first to breakfast late for dinner, F...
11    [Kush got a nigga feeling awesome, Ate that bi...
12    [See they think I'm a fuck nigga, But if they ...
13    [The return of the gangsta cause niggas don't ...
14    [Remember when my first meal was school lunch,...
15    [I can eat a pound and shit sixty four quarter...
16    [I was thinkin' 'bout somethin', But I ain't w...
17    [And I kept lickin' on that clit, Till she

In [34]:
df_stripped = pd.DataFrame(df_stripped)
df_stripped

Unnamed: 0,Lyrics
0,"[8 balls wit a safety pin, I deserve the finer..."
1,"[Now tell me, would you sacrifice your dreams?..."
2,"[I was thinkin' 'bout somethin', But I ain't w..."
3,"[And I kept lickin' on that clit, Till she gav..."
4,"[It's the same old shit, Everybody in a manner..."
5,[We been lookin at the house for a bout a whol...
6,"[Early morning next day at the junkyard, Got f..."
7,"[Feeling like I got money, No, I'm just nigga ..."
8,"[Feeling like I'm not alive, But I know I'm no..."
9,"[I'm the first to breakfast, late for dinner, ..."


In [36]:
df_stripped = df_stripped['Lyrics'].replace('[','')
df_stripped

0     [8 balls wit a safety pin, I deserve the finer...
1     [Now tell me, would you sacrifice your dreams?...
2     [I was thinkin' 'bout somethin', But I ain't w...
3     [And I kept lickin' on that clit, Till she gav...
4     [It's the same old shit, Everybody in a manner...
5     [We been lookin at the house for a bout a whol...
6     [Early morning next day at the junkyard, Got f...
7     [Feeling like I got money, No, I'm just nigga ...
8     [Feeling like I'm not alive, But I know I'm no...
9     [I'm the first to breakfast, late for dinner, ...
10    [I'm the first to breakfast late for dinner, F...
11    [Kush got a nigga feeling awesome, Ate that bi...
12    [See they think I'm a fuck nigga, But if they ...
13    [The return of the gangsta cause niggas don't ...
14    [Remember when my first meal was school lunch,...
15    [I can eat a pound and shit sixty four quarter...
16    [I was thinkin' 'bout somethin', But I ain't w...
17    [And I kept lickin' on that clit, Till she

In [40]:
df_brandnew = pd.DataFrame(df_stripped)
df_brandnew['Lyrics'] = df_brandnew['Lyrics'].str.join(',')
df_brandnew.head()

Unnamed: 0,Lyrics
0,"8 balls wit a safety pin,I deserve the finer t..."
1,"Now tell me, would you sacrifice your dreams?,..."
2,"I was thinkin' 'bout somethin',But I ain't wor..."
3,"And I kept lickin' on that clit,Till she gave ..."
4,"It's the same old shit,Everybody in a manner t..."


In [39]:
df_brandnew.to_csv(r'PATH TO THE FOLDER OF YOUR CHOICE\FILE NAME.csv')