In [30]:
import pandas as pd
import numpy as np
from scipy.interpolate import PchipInterpolator

from bokeh.plotting import figure, show
from bokeh.io import curdoc, output_notebook
from bokeh.models import BoxZoomTool, ResetTool, PanTool, WheelZoomTool, HoverTool, AnnularWedge, ColumnDataSource, Legend, LegendItem, Plot, Range1d
from bokeh.palettes import PuBu, Inferno256, Category20

In [31]:
df_purchase = pd.read_csv("./purchases.csv")
df_cash = pd.read_csv("./encashments.csv")

In [32]:
df_purchase.rename(columns={'Date of Purchase': 'Date', 'Purchaser Name': 'Entity'}, inplace=True)
df_purchase.drop("Unnamed: 3", axis=1, inplace=True)
df_purchase

Unnamed: 0,Date,Entity,Denomination
0,12/Apr/2019,A B C INDIA LIMITED,100000
1,12/Apr/2019,A B C INDIA LIMITED,100000
2,12/Apr/2019,A B C INDIA LIMITED,1000000
3,12/Apr/2019,A B C INDIA LIMITED,1000000
4,12/Apr/2019,A B C INDIA LIMITED,100000
...,...,...,...
18866,11/Jan/2024,WESTWELL GASES PRIVATE LIMITED,10000000
18867,11/Jan/2024,WESTWELL GASES PRIVATE LIMITED,10000000
18868,11/Jan/2024,WESTWELL GASES PRIVATE LIMITED,10000000
18869,11/Jan/2024,WESTWELL GASES PRIVATE LIMITED,10000000


In [33]:
df_cash.rename(columns={'Date of\nEncashment': 'Date', 'Name of the Political Party': 'Entity'}, inplace=True)
df_cash.drop("Unnamed: 3", axis=1, inplace=True)
df_cash

Unnamed: 0,Date,Entity,Denomination
0,12/Apr/2019,ALL INDIA ANNA DRAVIDA MUNNETRA KAZHAGAM,1000000
1,12/Apr/2019,ALL INDIA ANNA DRAVIDA MUNNETRA KAZHAGAM,1000000
2,12/Apr/2019,ALL INDIA ANNA DRAVIDA MUNNETRA KAZHAGAM,10000000
3,12/Apr/2019,ALL INDIA ANNA DRAVIDA MUNNETRA KAZHAGAM,1000000
4,12/Apr/2019,ALL INDIA ANNA DRAVIDA MUNNETRA KAZHAGAM,1000000
...,...,...,...
20416,24/Jan/2024,JANASENA PARTY,1000000
20417,24/Jan/2024,JANASENA PARTY,1000000
20418,24/Jan/2024,JANASENA PARTY,1000000
20419,24/Jan/2024,JANASENA PARTY,1000000


In [34]:
def create_timeseries(df):
    df["Entity"] = df["Entity"].apply(lambda x: x.replace("(POLITICAL PARTY)", "").strip())
    df["Denomination"] = df["Denomination"].apply(lambda x: float(x)/10000000)
    df["Date"] = pd.to_datetime(df["Date"]).apply(lambda x: int(x.strftime('%Y-%m').replace("-", "")))
    df = df.groupby(["Date", "Entity"]).agg({"Denomination": lambda x: x.sum()}).reset_index()
    df.sort_values(by="Date", inplace=True)
    df['Cumulative'] = df.groupby('Entity')['Denomination'].cumsum()
    return df

In [35]:
def create_aggregate(df):
    df["Entity"] = df["Entity"].apply(lambda x: x.replace("(POLITICAL PARTY)", "").strip())
    df = df.groupby(["Entity"]).agg({"Denomination": lambda x: x.sum()}).reset_index()
    df.sort_values(by="Denomination", ascending=False, inplace=True)
    return df

In [36]:
def plot_scatter(x, y, title='Scatter Plot of X and Y Coordinates', x_label='X', y_label='Y', color="cyan", color_map=None, size_map=None):
    output_notebook()
    curdoc().theme = 'dark_minimal'

    p = figure(width=800, height=600, title=title)

    use_color = color_map if color_map is not None else color
    use_size = size_map if size_map is not None else 8
    p.scatter(x, y, size=use_size, color=use_color, alpha=0.5, legend_label='Data')

    p.xaxis.axis_label = x_label
    p.yaxis.axis_label = y_label
    p.grid.visible = True

    coefficients = np.polyfit(x, y, 1)
    slope = coefficients[0]
    intercept = coefficients[1]
    trendline = slope * x + intercept
    p.line(x, trendline, line_color='blue', line_width=1, legend_label='Trendline')

    hover = HoverTool(tooltips=[('X', '@x'), ('Y', '@y')])
    p.add_tools(hover)
    p.add_tools(PanTool(), BoxZoomTool(), WheelZoomTool(), ResetTool())
    show(p)

In [37]:
def plot_lines(df, x, y, cat, title='Line Chart', x_label='X', y_label='Y', color_map=None):
    output_notebook()
    curdoc().theme = 'dark_minimal'

    p = figure(width=1200, height=600, title=title)

    aggregated_df = df.groupby(cat)[y].sum().reset_index()
    top_20_categories = aggregated_df.nlargest(20, y)[cat]
    df = df[df[cat].isin(top_20_categories)]

    categories = df[cat].unique()
    colors = color_map if color_map else Category20[len(categories)]

    for i, category in enumerate(categories):
        category_data = df[df[cat] == category]
        p.line(category_data[x], category_data[y], line_color=colors[i % len(colors)], line_width=2, legend_label=category)

    p.legend.spacing = 0
    p.legend.margin = 0
    p.legend.padding = 0
    p.legend.label_text_font_size = '6pt'
    p.legend.click_policy = 'hide'
    p.add_layout(p.legend[0], 'right')

    p.add_tools(PanTool(), BoxZoomTool(), WheelZoomTool(), ResetTool())
    show(p)

In [38]:
df_purchase_time = create_timeseries(df_purchase)
df_purchase_time

Unnamed: 0,Date,Entity,Denomination,Cumulative
0,201904,A B C INDIA LIMITED,0.4000,0.4000
91,201904,RAIPUR BOTLING COMPANY,1.0001,1.0001
92,201904,RAJEEV KUMAR JAIN,0.2000,0.2000
94,201904,RENUKA INVESTMENTS AND FINANCE LTD,5.0000,5.0000
95,201904,RENUKESHWAR INVESTMENTS PVT LTD,5.0000,5.0000
...,...,...,...,...
1792,202401,GVPR ENGINEERS LTD,5.0000,10.0000
1791,202401,GRANULES INDIA LIMITED,3.0000,3.0000
1790,202401,GK ENERGY MARKETERS PRIVATE LIMITED,6.0000,6.0000
1798,202401,JAINENDRA PRAVINCHANDRA SHAH,0.9000,0.9000


In [39]:
df_cash_time = create_timeseries(df_cash)
df_cash_time

Unnamed: 0,Date,Entity,Denomination,Cumulative
0,201904,AAM AADMI PARTY,0.20,0.2000
17,201904,YSR CONGRESS PARTY (YUVAJANA SRAMIKA R,8.25,8.2500
16,201904,TELUGU DESAM PARTY,7.30,7.3000
15,201904,SHIVSENA,14.63,14.6300
14,201904,SHIROMANI AKALI DAL,6.26,6.2600
...,...,...,...,...
181,202401,BHARTIYA JANTA PARTY,202.00,6060.5111
180,202401,ALL INDIA TRINAMOOL CONGRESS,130.45,1609.5314
189,202401,TELUGU DESAM PARTY,118.20,218.8800
184,202401,"PRESIDENT, ALL INDIA CONGRESS COMMITTEE",35.90,1421.8655


In [40]:
df_purchase_agg = create_aggregate(df_purchase)
df_purchase_agg

Unnamed: 0,Entity,Denomination
354,FUTURE GAMING AND HOTEL SERVICES PR,1208.0000
652,MEGHA ENGINEERING AND INFRASTRUCTURES LI MITED,821.0000
866,QWIKSUPPLYCHAINPRIVATELIMITED,410.0000
417,HALDIA ENERGY LIMITED,377.0000
1248,VEDANTA LIMITED,375.6500
...,...,...
73,ANKUR SINGHAL S O SH ANIL SIN,0.0001
26,ADWITA FINVEST PRIVATE LIMITED,0.0001
579,KUNAL GUPTA,0.0001
718,N RAMAMOORTHY,0.0001


In [41]:
df_cash_agg = create_aggregate(df_cash)
df_cash_agg

Unnamed: 0,Entity,Denomination
5,BHARTIYA JANTA PARTY,6060.5111
3,ALL INDIA TRINAMOOL CONGRESS,1609.5314
17,"PRESIDENT, ALL INDIA CONGRESS COMMITTEE",1421.8655
4,BHARAT RASHTRA SAMITHI,1214.7099
7,BIJU JANATA DAL,775.5
8,DMK PARTY IN PARLIAMENT,639.0
25,YSR CONGRESS PARTY (YUVAJANA SRAMIKA R,337.0
24,TELUGU DESAM PARTY,218.88
21,SHIVSENA,159.3814
19,RASTRIYA JANTA DAL,72.5


In [42]:
df_purchase_time.to_csv("./df_purchase_time.csv", index=False)
df_cash_time.to_csv("./df_cash_time.csv", index=False)
df_purchase_agg.to_csv("./df_purchase_agg.csv", index=False)
df_cash_agg.to_csv("./df_cash_agg.csv", index=False)

In [43]:
color_map = [
    "#7F00FF",  # Purple
    "#FF00FF",  # Magenta
    "#FF0000",  # Red
    "#FF8C00",  # Dark Orange
    "#00FFFF",  # Cyan
    "#F5F5DC",  # Beige
    "#0000FF",  # Blue
    "#C0C0C0",  # Silver
    "#808080",  # Gray
    "#FAEBD7",  # Antique White
    "#00FFFF",  # Aqua
    "#FFE4C4",  # Bisque
    "#F0F8FF",  # Alice Blue
    "#FFA500",  # Orange
    "#F0FFFF",  # Azure
    "#008000",  # Green
    "#FFEBCD",  # Blanched Almond
    "#7FFFD4",  # Aquamarine
    "#8A2BE2",  # Blue Violet
    "#A52A2A",  # Brown
    "#DEB887",  # Burly Wood
    "#5F9EA0",  # Cadet Blue
    "#7FFF00",  # Chartreuse
    "#D2691E",  # Chocolate
]

In [44]:
plot_lines(df_cash_time, "Date", "Denomination", "Entity", color_map=color_map, title="Annual Encashments by Party")

In [45]:
plot_lines(df_cash_time, "Date", "Cumulative", "Entity", color_map=color_map, title="Cumulative Encashments by Party")

In [46]:
plot_lines(df_purchase_time, "Date", "Denomination", "Entity", color_map=color_map, title="Annual Donations by Entity")

In [47]:
plot_lines(df_purchase_time, "Date", "Cumulative", "Entity", color_map=color_map, title="Cumulative Donations by Entity")