In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

In [None]:
JazzStandards = pd.read_csv("/kaggle/input/jazz-standards/JazzStandards.csv")

JazzStandards.head()

# Year
## When did most of the jazz standards published?

### Top 10 Years

In [None]:
JazzStandards.groupby('Year')['Year'].count().sort_values(ascending=False)[:10]

Grouping and sorting by the year has demonstrated that **1938** and **1941** are the years when the most of the standards were published, with **41** standards in each. They are followed by **1937**, **1936**, and **1944** with **37**, **36** and **35** standards respectively.

### A list of the standards that was published in 1938:

In [None]:
songs_1938 = list(JazzStandards[JazzStandards['Year'] == '1938']['Title'].values)

print('Standards published in 1938:')
print()
for i in range(len(songs_1938)):
    print('- ' + songs_1938[i])

### A list of the standards that was published in 1941:

In [None]:
songs_1941 = list(JazzStandards[JazzStandards['Year'] == '1941']['Title'].values)

print('Standards published in 1941:')
print()
for i in range(len(songs_1941)):
    print('- ' + songs_1941[i])

In [None]:
Top10Years = pd.Series(JazzStandards.groupby('Year')['Year'].count().sort_values(ascending=False)[:10].index)

print('Minimum:', Top10Years.min())
print('Maximum:', Top10Years.max())
print('Range: ' + str(int(Top10Years.max()) - int(Top10Years.min())))

Looking at the minimum and maximum values of the top 10 years revealed that most of the well-known jazz standards has been published between the years **1928** and **1944**, a range of **16** years.

## Visualizing Years

In [None]:
x_axis = JazzStandards[(JazzStandards['Year'] != 'rad.') & (JazzStandards['Year'] > '1910') & (JazzStandards['Year'] < '1969')].groupby('Year')['Year'].count().index
y_axis = JazzStandards[(JazzStandards['Year'] != 'rad.') & (JazzStandards['Year'] > '1910') & (JazzStandards['Year'] < '1969')].groupby('Year')['Year'].count()

In [None]:
plt.style.use('seaborn')

plt.figure(figsize=(24,12))

plt.plot(x_axis, y_axis, color='red', linewidth=3, marker='o', markersize=5, mfc='k')

plt.title('Standards by Year', fontsize=40)
#plt.xlabel('Years', fontsize=25)
plt.ylabel('# of Standards Published', fontsize=25)

plt.xticks(rotation=90, fontsize=20)
plt.yticks(range(0, 43, 3), fontsize=20)

plt.grid(b=True, color='k', linewidth=0.2)

plt.show()

# Composers
## Composers with the most compositions

In [None]:
composer_lists = JazzStandards['Composer(s)'].apply(lambda x: x.split(', '))

In [None]:
composers = {}

In [None]:
for x in range(1000):
    for y in range(len(composer_lists[x])):
        if composer_lists[x][y] not in composers:
            composers[composer_lists[x][y]] = 1
        else:
            composers[composer_lists[x][y]] += 1

### Top 10 Composers

In [None]:
composers_df = pd.DataFrame(composers.items(), columns=['Composer', 'Num_Compositions']).sort_values('Num_Compositions', ascending=False).reset_index(drop=True)
composers_df.head(10)

In [None]:
print('Top 10 Composers with the most compositions in this dataset are:')
print()
for i in range(10):
    print(str(i+1) + '- ' + composers_df['Composer'][i] + ' with ' + str(composers_df['Num_Compositions'][i]) + ' compositions')

## Visualizing Composers

In [None]:
plt.style.use('seaborn')

plt.barh(composers_df['Composer'][:30], composers_df['Num_Compositions'][:30])

plt.title('Standards by the Composer', fontsize=20)
plt.xlabel('Composer', fontsize=15)
plt.ylabel('# of Compositions', fontsize=15)

plt.show()

## Compositions by the top 5 composers:
- **Duke Ellington**
- **Richard Rodgers**
- **Cole Porter**
- **George Gershwin**
- **Irving Berlin**

### Duke Ellington Compositions

In [None]:
Duke_compositions = []
for i in range(1000):
    if 'Duke Ellington' in JazzStandards['Composer(s)'][i]:
        Duke_compositions.append(JazzStandards.loc[i, 'Title'])

Here is a list of the compositions by **Duke Ellington**:

In [None]:
JazzStandards[JazzStandards['Title'].isin(Duke_compositions)]

### Richard Rodgers Compositions

Here is a list of the compositions by **Richard Rodgers**:

In [None]:
Rodgers_compositions = []
for i in range(1000):
    if 'Richard Rodgers' in JazzStandards['Composer(s)'][i]:
        Rodgers_compositions.append(JazzStandards.loc[i, 'Title'])

In [None]:
JazzStandards[JazzStandards['Title'].isin(Rodgers_compositions)]

### Cole Porter Compositions

In [None]:
Porter_compositions = []
for i in range(1000):
    if 'Cole Porter' in JazzStandards['Composer(s)'][i]:
        Porter_compositions.append(JazzStandards.loc[i, 'Title'])

Here is a list of the compositions by **Cole Porter**:

In [None]:
JazzStandards[JazzStandards['Title'].isin(Porter_compositions)]

### George Gershwin Compositions

Here is a list of the compositions by **George Gershwin**:

In [None]:
Gershwin_compositions = []
for i in range(1000):
    if 'George Gershwin' in JazzStandards['Composer(s)'][i]:
        Gershwin_compositions.append(JazzStandards.loc[i, 'Title'])

In [None]:
JazzStandards[JazzStandards['Title'].isin(Gershwin_compositions)]

### Irving Berlin Compositions

Here is a list of the compositions by **Irving Berlin**:

In [None]:
Berlin_compositions = []
for i in range(1000):
    if 'Irving Berlin' in JazzStandards['Composer(s)'][i]:
        Berlin_compositions.append(JazzStandards.loc[i, 'Title'])

In [None]:
JazzStandards[JazzStandards['Title'].isin(Berlin_compositions)]

# Lyrics
## Lyricists with the most lyrics

In [None]:
lyrics_lists = JazzStandards['Lyricist(s)'].apply(lambda x: x.split(', '))

In [None]:
lyricists = {}

In [None]:
for x in range(1000):
    for y in range(len(lyrics_lists[x])):
        if lyrics_lists[x][y] not in lyricists:
            lyricists[lyrics_lists[x][y]] = 1
        else:
            lyricists[lyrics_lists[x][y]] += 1

## Top 10 Lyricists

In [None]:
lyrics_df = pd.DataFrame(lyricists.items(), columns=['Lyricist', 'Num_Lyrics']).sort_values('Num_Lyrics', ascending=False).reset_index(drop=True)
lyrics_df.head(10)

In [None]:
print('Top 10 Lyricists with the most lyrics in this dataset are:')
print()

for i in range(10):
    print(str(i+1) + '- ' + lyrics_df['Lyricist'][i] + ' with ' + str(lyrics_df['Num_Lyrics'][i]) + ' lyrics')

## Visualizing Lyricists

In [None]:
plt.style.use('seaborn')

plt.barh(lyrics_df['Lyricist'][:30], lyrics_df['Num_Lyrics'][:30])

plt.title('Standards by the Lyricist', fontsize=20)
plt.xlabel('Lyricist', fontsize=15)
plt.ylabel('# of Lyrics', fontsize=15)

plt.xticks(rotation=90)


plt.show()

## Lyrics by the top 5 lyricists:
- **Johnny Mercer**
- **Lorenz Hart**
- **Cole Porter**
- **Ira Gershwin**
- **Irving Berlin**

### Lyrics written by Johnny Mercer

In [None]:
Mercer_lyrics = []
for i in range(1000):
    if 'Johnny Mercer' in JazzStandards['Lyricist(s)'][i]:
        Mercer_lyrics.append(JazzStandards.loc[i, 'Title'])

Here is a list of the standards whose lyrics written by **Johnny Mercer**:

In [None]:
JazzStandards[JazzStandards['Title'].isin(Mercer_lyrics)]

### Lyrics written by Lorenz Hart

In [None]:
Hart_lyrics = []
for i in range(1000):
    if 'Lorenz Hart' in JazzStandards['Lyricist(s)'][i]:
        Hart_lyrics.append(JazzStandards.loc[i, 'Title'])

Here is a list of the standards whose lyrics written by **Lorenz Hart**:

In [None]:
JazzStandards[JazzStandards['Title'].isin(Hart_lyrics)]

### Lyrics written by Cole Porter

In [None]:
Porter_lyrics = []
for i in range(1000):
    if 'Cole Porter' in JazzStandards['Lyricist(s)'][i]:
        Porter_lyrics.append(JazzStandards.loc[i, 'Title'])

Here is a list of the standards whose lyrics written by **Cole Porter**:

In [None]:
JazzStandards[JazzStandards['Title'].isin(Porter_lyrics)]

### Lyrics written by Ira Gershwin

In [None]:
Gershwin_lyrics = []
for i in range(1000):
    if 'Ira Gershwin' in JazzStandards['Lyricist(s)'][i]:
        Gershwin_lyrics.append(JazzStandards.loc[i, 'Title'])

Here is a list of the standards whose lyrics written by **Ira Gershwin**:

In [None]:
JazzStandards[JazzStandards['Title'].isin(Gershwin_lyrics)]

### Lyrics written by Irving Berlin

In [None]:
Berlin_lyrics = []
for i in range(1000):
    if 'Irving Berlin' in JazzStandards['Lyricist(s)'][i]:
        Berlin_lyrics.append(JazzStandards.loc[i, 'Title'])

Here is a list of the standards whose lyrics written by **Irving Berlin**:

In [None]:
JazzStandards[JazzStandards['Title'].isin(Berlin_lyrics)]