In [None]:
import pandas as pd
import numpy as np

def get_tenure_end(arg: str) -> int:
    pieces = arg.split('-')
    if len(pieces) > 1:
        return int(pieces[1])
    return int(pieces[0])
    
    
df = pd.read_csv(filepath_or_buffer='/kaggle/input/indian-prime-ministers-performance-data/Indian_Prime_Miniters.csv', thousands=',')
df = df.replace(to_replace={'Not available': np.nan, 'Not availabl': np.nan,})
df = df.drop(df.tail(n=4).index) # drop the last 4 rows because they are null
df['Start Year'] = df['Tenure'].apply(func=lambda x: int(x.split('-')[0]))
df['End Year'] = df['Tenure'].apply(get_tenure_end)
df['Tenure (years)'] = df['End Year'] - df['Start Year']
df['infant mortality rate'] = df['Infant Mortality Rate'].apply(lambda x: float(x.split()[0]))
df['maternal mortality rate'] = df['Maternal Mortality Rate'].apply(lambda x: float(x.split()[0].replace(',', '')))
df.head()

In [None]:
df.info()

In [None]:
from plotly.express import bar
bar(data_frame=df, x='State', color='Political Party')

In [None]:
bar(data_frame=df, color='State', x='Political Party')

In [None]:
from plotly.express import strip
strip(data_frame=df, y='State', x='Political Party', hover_name='Prime Minister', color='Educational Qualification')

This plot captures more information and does a better job of treating state and party as independent variables, but it is not especially intuitive.

In [None]:
from plotly.express import scatter
scatter(data_frame=df, x='Population at Beginning of Tenure (in millions)', y='Population at End of Tenure (in millions)',
        hover_name='Prime Minister', color='Number of Universities in India during tenure')

There are several dynamics here that are difficult to observe directly: tenure length, changes in population growth, rate of opening new universities, and universities per million population; this graph really just shows the passage of time observed indirectly.

In [None]:
scatter(data_frame=df, x='infant mortality rate', y='maternal mortality rate', trendline='ols', hover_name='Prime Minister', color='Start Year')

Infant Mortality Rate and Maternal Mortality rate are linearly correlated (our OLS has an r2 of 0.82) and this graph shows both declining as time passes.