In [47]:
import pandas as pd
pd.options.display.width = 1000

df= pd.read_csv('music.csv')
print(df.head())

        artist country  plays  genre  fans
0  The Beatles      UK    150   rock    50
1   Pink Floyd      UK  10000   rock  1500
2    Metallica      US    500  metal    50
3     Cairokee   Egypt    200   rock    10
4         ACDC      US    250   rock    20


In [48]:
# return total number of plays for each country in dict format
country_stat = df.groupby('country').sum()["plays"]
print(type(country_stat))
print(country_stat)
# convert to dict
print(country_stat.to_dict())

<class 'pandas.core.series.Series'>
country
Egypt        200
Finland      250
UK         35150
US          2050
Name: plays, dtype: int64
{'Egypt': 200, 'Finland': 250, 'UK': 35150, 'US': 2050}


In [49]:
# return total number of plays for each country and genre in dict format
country_stat = df.groupby(['country', 'genre']).sum()["plays"]
print(type(country_stat))
print(country_stat)
# convert to dict
print(country_stat.to_dict())

<class 'pandas.core.series.Series'>
country  genre
Egypt    rock       200
Finland  rock       250
UK       metal    25000
         rock     10150
US       metal      800
         rock      1250
Name: plays, dtype: int64
{('Egypt', 'rock'): 200, ('Finland', 'rock'): 250, ('UK', 'metal'): 25000, ('UK', 'rock'): 10150, ('US', 'metal'): 800, ('US', 'rock'): 1250}


In [50]:
# In this challenge, you'll filter the resulting groups of the music dataset to find countries with the sum of artists' plays greater than 1000.
country_stat = df.groupby(['country']).sum()["plays"]
# country_stat = country_stat[country_stat["plays"] > 1000]
country_stat = country_stat[country_stat > 1000]
print(type(country_stat))
print(country_stat)

print("use reset index")
country_stat = country_stat.reset_index() # make country (group by column) as column and use default index 0,1,2 ...
print(type(country_stat))
print(country_stat)
# convert index to list
print(list(country_stat.country))

<class 'pandas.core.series.Series'>
country
UK    35150
US     2050
Name: plays, dtype: int64
use reset index
<class 'pandas.core.frame.DataFrame'>
  country  plays
0      UK  35150
1      US   2050
['UK', 'US']


In [51]:
# Your music analyst would like to know the ratio of plays/fans (plays per fan) to see how dedicated listeners are to artists
print(df.head())

country_stat = df.groupby(['country'])["plays", "fans"].sum()
print(type(country_stat))
print(country_stat.head(2))
print("reset index")
country_stat = country_stat.reset_index()
print(country_stat.head(2))
country_stat["ratio"] = country_stat["plays"] / country_stat["fans"]
print(country_stat.head(2))
# convert to dict where country is key and ratio is value
print(country_stat.set_index('country')['ratio'].to_dict())

# or doing lambla function to have this
df.groupby('country').apply(lambda x: x.plays.sum() / x.fans.sum()).to_dict()

        artist country  plays  genre  fans
0  The Beatles      UK    150   rock    50
1   Pink Floyd      UK  10000   rock  1500
2    Metallica      US    500  metal    50
3     Cairokee   Egypt    200   rock    10
4         ACDC      US    250   rock    20
<class 'pandas.core.frame.DataFrame'>
         plays  fans
country             
Egypt      200    10
Finland    250    10
reset index
   country  plays  fans
0    Egypt    200    10
1  Finland    250    10
   country  plays  fans  ratio
0    Egypt    200    10   20.0
1  Finland    250    10   25.0
{'Egypt': 20.0, 'Finland': 25.0, 'UK': 5.809917355371901, 'US': 12.058823529411764}


{'Egypt': 20.0,
 'Finland': 25.0,
 'UK': 5.809917355371901,
 'US': 12.058823529411764}

In [61]:
df= pd.read_csv('music.csv')
print(df.head())
print(df.shape)
print(df.nunique())

country = df.groupby('country').agg(
    plays_sum=('plays', 'sum'),
    plays_mean=('plays', 'mean'),
    fans_max=('fans', 'max')
)
print(country.head(2))
print(country.to_dict(orient='index'))

# another way, depends on pandas version

country = df.groupby('country').agg(
    {
        'plays': ['sum', 'mean'],
        'fans': 'max'
    }
)

# flatten the multi-index columns
country.columns = ['_'.join(col).strip() for col in country.columns.values]
print(country.head(2))

        artist country  plays  genre  fans
0  The Beatles      UK    150   rock    50
1   Pink Floyd      UK  10000   rock  1500
2    Metallica      US    500  metal    50
3     Cairokee   Egypt    200   rock    10
4         ACDC      US    250   rock    20
(10, 5)
artist     10
country     4
plays       9
genre       2
fans        7
dtype: int64
         plays_sum  plays_mean  fans_max
country                                 
Egypt          200       200.0        10
Finland        250       250.0        10
{'Egypt': {'plays_sum': 200, 'plays_mean': 200.0, 'fans_max': 10}, 'Finland': {'plays_sum': 250, 'plays_mean': 250.0, 'fans_max': 10}, 'UK': {'plays_sum': 35150, 'plays_mean': 8787.5, 'fans_max': 3500}, 'US': {'plays_sum': 2050, 'plays_mean': 512.5, 'fans_max': 80}}
         plays_sum  plays_mean  fans_max
country                                 
Egypt          200       200.0        10
Finland        250       250.0        10
