In [1]:
import pandas as pd
import altair as alt
import pycountry
import pytz
import datetime as dt
import matplotlib.pyplot as plt
import numpy as np
import requests
import json
import random
import seaborn as sns
pd.options.display.max_rows = 4000
%matplotlib inline

### Importing Profile Info

In [2]:
profile = pd.read_csv("/kaggle/input/lastfm/userid-profile.csv", names=['user_id', 'gender', 'age', 'country', 'registered'])

In [3]:
profile = profile.dropna(subset=['country'])
profile = profile.reset_index()
profile = profile.drop('index', axis=1)
profile = profile.iloc[1: , :]

In [4]:
mapping = {country.name: country.alpha_2 for country in pycountry.countries}
country_code = []

for name in profile['country']:
    country_code.append(mapping.get(name))

In [None]:
profile['country_code'] = country_code

In [None]:
profile.head(20)

In [None]:
profile.shape

### Importing Profile SHA Info

In [5]:
profile_sha = pd.read_csv("/kaggle/input/lastfm/usersha1-profile.csv", names=['user_sha', 'gender', 'age', 'country', 'registered'])

In [6]:
mapping = {country.name: country.alpha_2 for country in pycountry.countries}
country_code = []

for name in profile_sha['country']:
    country_code.append(mapping.get(name))

In [7]:
profile_sha['country_code'] = country_code

In [8]:
profile_sha.head(20)

### Importing Listening Activity

In [9]:
activity = pd.read_csv("/kaggle/input/lastfm/userid-timestamp-artid-artname-traid-traname.tsv", sep="\t", error_bad_lines=False, names=['user_id', 'timestamp', 'artist_id', 'artist_name', 'track_id', 'track_name'])

In [10]:
activity.shape

### Importing Artist Plays

In [11]:
artist_plays = pd.read_csv("/kaggle/input/lastfm/usersha1-artmbid-artname-plays.tsv", sep="\t", error_bad_lines=False, header=None, names=['user_sha', 'artist_id', 'artist_name', 'plays'])

In [12]:
artist_plays.shape

In [13]:
artist_profile_joined = artist_plays.set_index('user_sha').join(profile_sha.set_index('user_sha')).drop(['age', 'gender', 'registered', 'artist_id'], axis=1)

In [14]:
artist_profile_joined.reset_index(level=0, inplace=True)

In [15]:
artist_profile_joined.head(20)

In [16]:
artist_profile_joined = artist_profile_joined.dropna()

### Plotting the Number of Artist Streams per Country

In [17]:
artist_plays_per_country = artist_profile_joined.drop('user_sha', axis=1)
artist_plays_per_country['total'] = artist_plays_per_country.groupby(['artist_name', 'country'])['plays'].transform('sum')
artist_plays_per_country = artist_plays_per_country.drop_duplicates(subset=['artist_name', 'country'])

In [18]:
artist_plays_per_country = artist_plays_per_country.reset_index()

In [19]:
artist_plays_per_country = artist_plays_per_country.drop('index', axis=1)

In [20]:
import plotly.express as px

#### Betty Blowtorch

In [21]:
temp = artist_plays_per_country[artist_plays_per_country['artist_name'].isin(['betty blowtorch'])]

In [22]:
fig = px.choropleth(temp, locations='country', color='total',
                           hover_name='country', locationmode='country names'
                          )
# fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

#### Kanye West

In [23]:
temp = artist_plays_per_country[artist_plays_per_country['artist_name'].isin(['kanye west'])]

In [24]:
fig = px.choropleth(temp, locations='country', color='total',
                           hover_name='country', locationmode='country names'
                          )
# fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

#### Coldplay

In [25]:
temp = artist_plays_per_country[artist_plays_per_country['artist_name'].isin(['coldplay'])]

In [26]:
fig = px.choropleth(temp, locations='country', color='total',
                           hover_name='country', locationmode='country names'
                          )
# fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

### Creating a Listening Clock

In [27]:
activity_profile_joined = activity.set_index('user_id').join(profile.set_index('user_id'))

In [28]:
activity_profile_joined.head(20)

In [29]:
activity_profile_joined = activity_profile_joined.reset_index()

In [30]:
activity_profile_joined.head()

In [31]:
activity_profile_joined['timestamp'] = pd.to_datetime(activity_profile_joined['timestamp'])
activity_profile_joined['hour'] = activity_profile_joined.timestamp.dt.hour

In [32]:
tempp = activity_profile_joined.groupby(['artist_name']).size().reset_index(name='Counts')

In [33]:
temp = activity_profile_joined[activity_profile_joined['artist_name'].isin(['Pink Floyd'])]

In [34]:
temp = temp.reset_index()
temp = temp.drop('index', axis=1)

In [35]:
temp2 = temp.groupby(['country', 'hour']).size().reset_index(name='Counts')
temp2 = temp2[temp2['country'] == 'United States']
temp3 = temp2.drop('country', axis=1)
temp3 = temp3.reset_index()
temp3 = temp3.drop('index', axis=1)

In [36]:
temp3.head(20)

#### Kanye West

In [37]:
ax = sns.barplot(x="hour", y="Counts", data=temp3, palette="coolwarm").set_title("Listening Hours for Kanye West")
plt.show()

#### Coldplay

In [38]:
ax = sns.barplot(x="hour", y="Counts", data=temp3, palette="coolwarm").set_title("Listening Hours for Coldplay")
plt.show()

#### Pink Floyd

In [39]:
ax = sns.barplot(x="hour", y="Counts", data=temp3, palette="coolwarm").set_title("Listening Hours for Pink Floyd")
plt.show()