# The "Tarantino Dataset" by FiveThirtyEight 
### A Complete Catalog Of Every Time Someone Cursed Or Was Killed In A Quentin Tarantino Movie
https://www.kaggle.com/datasets/fivethirtyeight/cuss-words-and-deaths-in-quentin-tarantino-films

In [None]:
import pandas as pd
import numpy as np 
import re   
import warnings
warnings.filterwarnings('ignore')  
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import minmax_scale 
sns.set(rc={'figure.figsize':(17,7)}) 
plt.style.use('fivethirtyeight') # 

In [None]:
df=pd.read_csv('tarantino_csv.csv') 
df=df.fillna('') 
print(df.info())
df 

In [None]:
top_words=df.loc[df['type']=='word']['word'].value_counts()[:9] 
top_words

In [None]:
dfWords=df.loc[df['word'].isin(top_words.index)] 
dfWords.head()

In [None]:
wgroup=df.loc[df['type']=='word'].groupby('movie', as_index=False).count()
wgroup['swears count']=wgroup['type']
wgroup=wgroup[['movie','swears count']] 
wgroup.sort_values('swears count', ascending=False, inplace=True)

sns.barplot(data=wgroup, x='movie', y='swears count')

In [None]:
sns.swarmplot(data=dfWords, x="minutes_in", y="movie", hue='word', size=3).set_title("SWEAR DISTRIBUTION")

In [None]:
sns.histplot(data=df.loc[df['type']=='word'], x="minutes_in", hue='movie', bins=35 ).set_title("SWEAR COUNTS HISTOGRAM")

In [None]:
for w in dfWords['word'].unique(): 
    d = df.loc[df['word']==w] 
    plt.figure(w)
    sns.boxplot(data=d , x='minutes_in', y='movie').set_title( f'"{w.upper()}" PER FILM')

# DEATH

In [None]:
group=df.loc[df['type']=='death'].groupby('movie', as_index=False).count()
group.sort_values('type', inplace=True, ascending=False) 
group['death count']=group['type'] 
sns.barplot(data=group, y='death count', x='movie' )

In [None]:
death=df.loc[df['type']=='death']
sns.swarmplot(data=death, x="minutes_in", y="movie").set_title('DEATH BY MINUTES IN')

In [None]:
sns.histplot(data=df.loc[df['type']=='death'], x="minutes_in", hue='movie', bins=25 ).set_title("DEATH COUNTS ALL FILMS")

In [None]:
sns.catplot(data=df, x="minutes_in", y='type', kind='violin')

In [None]:
sns.swarmplot(data=df, x="minutes_in", y='type').set_title('DEATH/SWEAR CORRELATION SCATTER')

In [None]:
 
sns.swarmplot(data=df, x="minutes_in", y="movie", hue='type').set_title('DEATH/SWEAR CORRELATION SCATTER')
 

In [None]:
dfn=df.copy()
dfn['nminutes_in']=0
for m in dfn.movie.unique():
     d=dfn.loc[dfn.movie==m]
     dfn.loc[dfn.movie==m,'nminutes_in'] = minmax_scale(d['minutes_in'], feature_range=(0,1)) 

In [None]:
sns.swarmplot(data=dfn, x="nminutes_in", y='movie', hue='type').set_title('NORMALIZED MINUTES IN')

In [None]:
sns.swarmplot(data=dfn, x="nminutes_in", y='type' ).set_title('NORMALIZED MINUTES IN')
sns.catplot(data=dfn, x="nminutes_in", y='type', kind='violin' ) 