In [1]:
import pandas as pd
import numpy as np

In [2]:
songs = pd.read_csv('data/rock.csv')
songs.head()

Unnamed: 0,Song Clean,ARTIST CLEAN,Release Year,COMBINED,First?,Year?,PlayCount,F*G
0,Caught Up in You,.38 Special,1982.0,Caught Up in You by .38 Special,1,1,82,82
1,Fantasy Girl,.38 Special,,Fantasy Girl by .38 Special,1,0,3,0
2,Hold On Loosely,.38 Special,1981.0,Hold On Loosely by .38 Special,1,1,85,85
3,Rockin' Into the Night,.38 Special,1980.0,Rockin' Into the Night by .38 Special,1,1,18,18
4,Art For Arts Sake,10cc,1975.0,Art For Arts Sake by 10cc,1,1,1,1


In [8]:
songs = pd.read_csv(
    'data/rock.csv',
    usecols=['Song Clean', 'ARTIST CLEAN', 'Release Year', 'PlayCount'],
    converters={'Release Year': lambda x: int(x) if x.isdigit() and int(x) > 1900 else np.nan}
)
songs = songs.rename(columns={
        'Song Clean': 'title',
        'ARTIST CLEAN': 'artist',
        'Release Year': 'year',
        'PlayCount': 'count'})
songs.head()

Unnamed: 0,title,artist,year,count
0,Caught Up in You,.38 Special,1982.0,82
1,Fantasy Girl,.38 Special,,3
2,Hold On Loosely,.38 Special,1981.0,85
3,Rockin' Into the Night,.38 Special,1980.0,18
4,Art For Arts Sake,10cc,1975.0,1


In [12]:
# Get number of songs released in year 1981
released_in_1981 = songs['year'] == 1989
print 'There were {} songs released in 1981'.format(
    len(songs[released_in_1981])
)

There were 32 songs released in 1981


In [5]:
# Get number of songs released before 1984
before_1984 = songs['year'] < 1984
print 'There were {} songs released before 1984'.format(
    len(songs[before_1984])
)


There were 1218 songs released before 1984


In [13]:
# Earliest release year
print 'The first rock song was written in: {:.0f}'.format(
    songs['year'].min()
)

The first rock song was written in: 1955


In [15]:
# Top 20 songs by play count
top_20 = songs.sort_values('count', ascending=False)[:20]
top_20[ ['title', 'count'] ]

Unnamed: 0,title,count
49,Dream On,142
868,All Along the Watchtower,141
65,Sweet Emotion,141
38,You Shook Me All Night Long,138
267,More Than a Feeling,134
968,Carry On Wayward Son,134
269,Peace of Mind,132
774,Crazy On You,125
2220,Legs,121
2224,Sharp Dressed Man,120


In [16]:
# Top 10 prolific artists
songs['artist'].value_counts()[:10]

The Beatles                      100
Led Zeppelin                      69
Rolling Stones                    55
Van Halen                         44
Pink Floyd                        39
Aerosmith                         31
The Who                           31
Tom Petty & The Heartbreakers     29
AC/DC                             29
Bob Seger                         24
Name: artist, dtype: int64

In [17]:
# Number of different artists
uniques = songs['artist'].unique()
print 'There are {} different artists in this data set'.format(
    len(uniques)
)


There are 475 different artists in this data set


In [18]:
# Get all the songs with the word 'rock' in the title
mask = songs['title'].apply(lambda x: 'rock' in x.lower())
print songs[mask].head()
print '\n'
print 'In total, there are {} songs with the word "rock" in the title'.format(len(songs[mask]))

                      title       artist  year  count
3    Rockin' Into the Night  .38 Special  1980     18
13   CAN'T STOP ROCK'N'ROLL        AC/DC   NaN      5
15  For Those About To Rock        AC/DC  1981     46
17           Hard As A Rock        AC/DC  1995      1
23        Let There Be Rock        AC/DC  1977      3


In total, there are 60 songs with the word "rock" in the title


In [19]:
mask

0       False
1       False
2       False
3        True
4       False
5       False
6       False
7       False
8       False
9       False
10      False
11      False
12      False
13       True
14      False
15       True
16      False
17       True
18      False
19      False
20      False
21      False
22      False
23       True
24      False
25      False
26      False
27      False
28       True
29      False
        ...  
2200    False
2201    False
2202    False
2203    False
2204    False
2205    False
2206    False
2207    False
2208    False
2209    False
2210    False
2211    False
2212    False
2213    False
2214    False
2215    False
2216    False
2217    False
2218    False
2219    False
2220    False
2221    False
2222    False
2223    False
2224    False
2225    False
2226    False
2227    False
2228    False
2229    False
Name: title, dtype: bool