In [1]:
import pandas as pd
import numpy as np
import psycopg2
import re
from sqlalchemy import create_engine
from config import db_pwd

In [2]:
# Connect to database (Note: The package psychopg2 is required for Postgres to work with SQLAlchemy)
db_string = f"postgres://postgres:{db_pwd}@127.0.0.1:5432/Platinum_Lyrics"

engine = create_engine(db_string)

## platinum lyrics

In [3]:
platinum_lyrics = pd.read_sql_table("platinum_lyrics",engine)
platinum_lyrics.head()

Unnamed: 0,track_id,artist_name,song_title,song_year,target_success,target_weeks,target_peak,word_abov,word_accept,word_ach,...,word_yeah,word_year,word_yellow,word_yes,word_yesterday,word_yet,word_york,word_young,word_yourself,word_youth
0,TRAAAAV128F421A322,western addiction,a poor recipe for civic cohesion,2005,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,TRAAABD128F429CF47,the box tops,soul deep,1969,1,18,14,0,0,0,...,4,0,0,0,0,0,0,0,0,0
2,TRAAAGF12903CEC202,halvdan sivertsen,smã¥ ord,2005,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,TRAAAZF12903CCCF6B,matthew wilder,break my stride,1984,1,29,95,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,TRAABEV12903CC53A4,suicide commando,blood in face,2000,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
platinum_lyrics.shape

(39094, 1556)

In [5]:
column_names = platinum_lyrics.columns.values
column_names

array(['track_id', 'artist_name', 'song_title', ..., 'word_young',
       'word_yourself', 'word_youth'], dtype=object)

In [6]:
# new column names (removing "_word")
new_column_names = {}
for column in column_names:
    new_column_names[column] = column.replace('_word', '')

new_column_names    

{'track_id': 'track_id',
 'artist_name': 'artist_name',
 'song_title': 'song_title',
 'song_year': 'song_year',
 'target_success': 'target_success',
 'target_weeks': 'target_weeks',
 'target_peak': 'target_peak',
 'word_abov': 'word_abov',
 'word_accept': 'word_accept',
 'word_ach': 'word_ach',
 'word_across': 'word_across',
 'word_act': 'word_act',
 'word_action': 'word_action',
 'word_addict': 'word_addict',
 'word_admit': 'word_admit',
 'word_ador': 'word_ador',
 'word_afraid': 'word_afraid',
 'word_against': 'word_against',
 'word_age': 'word_age',
 'word_ago': 'word_ago',
 'word_ahead': 'word_ahead',
 'word_aim': 'word_aim',
 'word_air': 'word_air',
 'word_album': 'word_album',
 'word_algo': 'word_algo',
 'word_aliv': 'word_aliv',
 'word_alla': 'word_alla',
 'word_alma': 'word_alma',
 'word_almost': 'word_almost',
 'word_alon': 'word_alon',
 'word_along': 'word_along',
 'word_alreadi': 'word_alreadi',
 'word_alright': 'word_alright',
 'word_although': 'word_although',
 'word_alway

In [7]:
platinum_lyrics = platinum_lyrics.rename(columns=new_column_names)
platinum_lyrics.head()

Unnamed: 0,track_id,artist_name,song_title,song_year,target_success,target_weeks,target_peak,word_abov,word_accept,word_ach,...,word_yeah,word_year,word_yellow,word_yes,word_yesterday,word_yet,word_york,word_young,word_yourself,word_youth
0,TRAAAAV128F421A322,western addiction,a poor recipe for civic cohesion,2005,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,TRAAABD128F429CF47,the box tops,soul deep,1969,1,18,14,0,0,0,...,4,0,0,0,0,0,0,0,0,0
2,TRAAAGF12903CEC202,halvdan sivertsen,smã¥ ord,2005,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,TRAAAZF12903CCCF6B,matthew wilder,break my stride,1984,1,29,95,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,TRAABEV12903CC53A4,suicide commando,blood in face,2000,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [8]:
# export to csv
platinum_lyrics.to_csv("../Resources/platinum_lyrics.csv")

## platinum lyrics features

In [9]:
platinum_lyrics_features = pd.read_sql_table("platinum_lyrics_features",engine)
platinum_lyrics_features.head()

Unnamed: 0,track_id,artist_name,song_title,song_year,feature_genre,feature_popularity,feature_duration,feature_key,feature_acousticness,feature_instrumentalness,...,word_yeah,word_year,word_yellow,word_yes,word_yesterday,word_yet,word_york,word_young,word_yourself,word_youth
0,TRAAABD128F429CF47,the box tops,soul deep,1969,pop,38,147760,9,0.195,3.5e-05,...,4,0,0,0,0,0,0,0,0,0
1,TRAAAZF12903CCCF6B,matthew wilder,break my stride,1984,pop,78,184480,10,0.195,0.0,...,0,0,0,0,0,0,0,0,0,0
2,TRAADKW128E079503A,tracy chapman,fast car,1988,folk,78,296800,4,0.313,0.0,...,0,0,0,0,0,0,0,1,0,0
3,TRAAFEU128E078581C,r.e.m.,fall on me,1986,rock,48,169920,0,0.0711,0.000391,...,0,0,0,0,0,0,0,0,0,0
4,TRAAGMC128F4292D0F,little texas,my love,1994,country,44,245800,9,0.377,0.0,...,0,0,0,0,0,0,0,0,0,0


In [10]:
platinum_lyrics_features.shape

(8692, 1571)

In [11]:
platinum_lyrics_features = platinum_lyrics_features.rename(columns=new_column_names)
platinum_lyrics_features.head()

Unnamed: 0,track_id,artist_name,song_title,song_year,feature_genre,feature_popularity,feature_duration,feature_key,feature_acousticness,feature_instrumentalness,...,word_yeah,word_year,word_yellow,word_yes,word_yesterday,word_yet,word_york,word_young,word_yourself,word_youth
0,TRAAABD128F429CF47,the box tops,soul deep,1969,pop,38,147760,9,0.195,3.5e-05,...,4,0,0,0,0,0,0,0,0,0
1,TRAAAZF12903CCCF6B,matthew wilder,break my stride,1984,pop,78,184480,10,0.195,0.0,...,0,0,0,0,0,0,0,0,0,0
2,TRAADKW128E079503A,tracy chapman,fast car,1988,folk,78,296800,4,0.313,0.0,...,0,0,0,0,0,0,0,1,0,0
3,TRAAFEU128E078581C,r.e.m.,fall on me,1986,rock,48,169920,0,0.0711,0.000391,...,0,0,0,0,0,0,0,0,0,0
4,TRAAGMC128F4292D0F,little texas,my love,1994,country,44,245800,9,0.377,0.0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
# export to csv
platinum_lyrics_features.to_csv("../Resources/platinum_lyrics_features.csv")

## platinum features

In [13]:
platinum_features = pd.read_sql_table("platinum_features",engine)

platinum_features.head()

Unnamed: 0,track_id,artist_name,song_title,song_year,feature_genre,feature_popularity,feature_duration,feature_key,feature_acousticness,feature_instrumentalness,...,feature_danceability,feature_energy,feature_liveness,feature_loudness,feature_speechiness,feature_valence,feature_explicit,target_success,target_weeks,target_peak
0,TRRBOBU128F4293068,texas,i don't want a lover,1989,country,61,300600,7,0.196,0.000487,...,0.756,0.47,0.126,-12.615,0.0394,0.43,0,1,77,6
1,TRVCPQS128F4285928,the youngbloods,ride the wind,1988,rock,20,396600,2,0.91,0.651,...,0.558,0.307,0.0866,-20.492,0.0343,0.674,0,0,0,0
2,TRZRMWW128F426E797,babyface,tender lover,1990,pop,37,259267,5,0.226,0.000422,...,0.743,0.86,0.0513,-6.346,0.0445,0.687,0,1,17,89
3,TRVSRVI128F4261843,reo speedwagon,one lonely night,1984,country,31,201467,5,0.0561,0.000149,...,0.408,0.579,0.0712,-10.277,0.026,0.397,0,1,19,16
4,TRSHXOI128F146B1AE,john waite,change,1982,rock,49,196693,11,0.246,0.0,...,0.403,0.848,0.616,-11.615,0.0625,0.622,0,1,54,10


In [14]:
platinum_lyrics_features.shape

(8692, 1571)

In [15]:
# export to csv
platinum_features.to_csv("../Resources/platinum_features.csv")