In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

from ipywidgets import interact, interactive, fixed, interact_manual, Layout
import ipywidgets as widgets

## Notebook display settings
pd.options.display.max_columns = 999

sns.set(rc={'figure.figsize':(16,16)})

In [27]:
base_data = pd.read_csv('non-normalized_data.csv')

In [28]:
col_names = base_data.columns.values

In [29]:
print(col_names)

['album_number' 'song_name' 'acousticness' 'danceability' 'energy'
 'loudness' 'track_number' 'album_name' 'bars_count' 'beats_count'
 'sections_count' 'segments_count' 'tatums_count' 'song_length_seconds'
 'bpm' 'McCartney' 'Lennon' 'Harrison' 'Other']


In [5]:
base_data = base_data[['album_number', 'acousticness', 'danceability', 'energy', 
                       'bars_count', 'beats_count', 'sections_count', 'segments_count',
 'tatums_count', 'song_length_seconds', 'bpm', 'McCartney', 'Lennon', 'Harrison', 'Other']]

In [36]:
y = base_data

In [37]:
y.head()

Unnamed: 0,album_number,song_name,acousticness,danceability,energy,loudness,track_number,album_name,bars_count,beats_count,sections_count,segments_count,tatums_count,song_length_seconds,bpm,McCartney,Lennon,Harrison,Other
0,1,I Saw Her Standing There - Remastered,0.27,0.491,0.801,-9.835,1,Please Please Me (Remastered),7180,28360,1781,250045,56730,173.946,9782.346245,1.0,0.0,0.0,0.0
1,1,Misery - Remastered,0.707,0.591,0.605,-10.925,2,Please Please Me (Remastered),3621,14596,1015,126147,29210,108.546,8068.10016,1.0,1.0,0.0,0.0
2,1,Anna (Go To Him) - Remastered,0.635,0.608,0.565,-11.06,3,Please Please Me (Remastered),4910,19737,2807,190106,39471,177.133,6685.484918,0.0,0.0,0.0,1.0
3,1,Chains - Remastered,0.608,0.654,0.561,-10.551,4,Please Please Me (Remastered),4774,19303,1527,150395,38619,145.08,7983.043838,0.0,0.0,0.0,1.0
4,1,Boys - Remastered,0.607,0.402,0.86,-10.31,5,Please Please Me (Remastered),5340,21381,2299,189816,42772,146.44,8760.31139,0.0,0.0,0.0,1.0


In [9]:
y.info()

def log_norm(df, col):
    return(np.log(df[col]))

In [10]:
feature_col_names = ['bars_count',
                     'beats_count',
                     'sections_count',
                     'segments_count',
                     'tatums_count',
                     'song_length_seconds',
                     'bpm']

In [11]:
## Iterating through columns to normalize the data 

for i in feature_col_names:
    y[i] = log_norm(y, i)

In [30]:
log_features = pd.melt(y, id_vars = ['album_number', 'album_name', 'track_number', 'song_name'], var_name = 'feature', value_name = 'value')

In [34]:
log_features.head()

Unnamed: 0,album_number,album_name,track_number,song_name,feature,value
0,1,Please Please Me (Remastered),1,I Saw Her Standing There - Remastered,acousticness,0.27
1,1,Please Please Me (Remastered),2,Misery - Remastered,acousticness,0.707
2,1,Please Please Me (Remastered),3,Anna (Go To Him) - Remastered,acousticness,0.635
3,1,Please Please Me (Remastered),4,Chains - Remastered,acousticness,0.608
4,1,Please Please Me (Remastered),5,Boys - Remastered,acousticness,0.607


In [15]:
log_features['value'] = log_features['value'].astype(float)

In [16]:
def cat_plot(x):
    temp_df = log_features[log_features.feature == x]

    sns.lmplot(x="album_number", y="value", hue="feature", data = temp_df)

In [17]:
cat_drop = widgets.Dropdown(
    options=[
        'acousticness',
        'danceability',
        'energy',
        'bars_count',
        'beats_count',],
    description='Category:',
    disabled=False,
    readout=True,
)

In [18]:
widgets.interact(cat_plot, x= cat_drop);

In [31]:
def future_value(df, req_feat):
    ## Takes the requested feature and calculates the regression line, then plots the regression line
    requested_df = df[df.feature == req_feat]

    x = np.array(requested_df.album_number)
    y = np.array(requested_df.value)

    fit = np.polyfit(x, y, 1)
    fit_fn = np.poly1d(fit) 

    #feature_plot = sns.lmplot(x="album_number", y="value", hue="feature", data = requested_df)
    print(fit_fn(14))

In [38]:

unique_feat_list = log_features.feature.unique()
for i in unique_feat_list:
    future_value(log_features, i)
    print("^ - ", i)

0.3062481815012574
^ -  acousticness
0.46959219997513757
^ -  danceability
0.42664557767342637
^ -  energy
-11.716340633032713
^ -  loudness
6245.045919533548
^ -  bars_count
23753.920570154514
^ -  beats_count
2275.0607688945393
^ -  sections_count
215707.03705322588
^ -  segments_count
47507.6481647826
^ -  tatums_count
193.25088127127856
^ -  song_length_seconds
7251.292658193146
^ -  bpm
0.49523801885544017
^ -  McCartney
0.33967061125792514
^ -  Lennon
0.1935309238042916
^ -  Harrison
0.0322852296377687
^ -  Other
