In [6]:
import pandas as pd
import numpy as np
from scipy.interpolate import PchipInterpolator

from bokeh.plotting import figure, show
from bokeh.io import curdoc, output_notebook
from bokeh.models import BoxZoomTool, ResetTool, PanTool, WheelZoomTool, HoverTool, AnnularWedge, ColumnDataSource, Legend, LegendItem, Plot, Range1d
from bokeh.palettes import PuBu, Inferno256, Category20

AttributeError: partially initialized module 'pandas' has no attribute '_pandas_datetime_CAPI' (most likely due to a circular import)

In [5]:
df_purchase = pd.read_csv("./purchases2.csv")
df_cash = pd.read_csv("./encashments2.csv")

NameError: name 'pd' is not defined

In [None]:
df_purchase.rename(columns={'Date of\rPurchase': 'Date', 'Name of the Purchaser': 'Entity'}, inplace=True)
df_purchase = df_purchase[df_purchase["Sr No."] != "Sr No."]
df_purchase = df_purchase.reset_index().drop(["index"], axis=1)
df_purchase["UBN"] = df_purchase["Prefix"] + df_purchase["Bond\rNumber"]
df_purchase

In [None]:
df_cash.rename(columns={'Date of\rEncashment': 'Date', 'Name of the Political Party': 'Entity'}, inplace=True)
df_cash = df_cash[df_cash["Sr No."] != "Sr No."]
df_cash = df_cash.reset_index().drop(["index"], axis=1)
df_cash["UBN"] = df_cash["Prefix"] + df_cash["Bond\rNumber"]
df_cash

In [None]:
df_main = df_purchase.merge(df_cash, left_on='UBN', right_on='UBN')
df_main.rename(columns={'Date_x': 'Date_Purchase', 'Date_y': 'Date_Cash', 'Entity_x': 'Buyer', 'Entity_y': 'Party', 'Denominations_y': 'Denominations'}, inplace=True)
df_main = df_main[["UBN", "Reference No  (URN)", "Date_Purchase", "Date_Cash", "Buyer", "Party", "Status", "Denominations"]]
df_main

In [None]:
cash_ubns = list(df_cash["UBN"])
not_cashed = df_purchase[~df_purchase["UBN"].isin(cash_ubns)]
not_cashed

In [None]:
purchase_ubns = list(df_purchase["UBN"])
purchase_missing = df_cash[~df_cash["UBN"].isin(purchase_ubns)]
purchase_missing

In [None]:
parties = purchase_missing[["Entity", "Denominations"]].groupby("Entity").sum()
parties

In [None]:
def create_timeseries(df):
    df["Entity"] = df["Entity"].apply(lambda x: x.replace("(POLITICAL PARTY)", "").strip())
    df["Denominations"] = df["Denominations"].apply(lambda x: float(x.replace(",", ""))/10000000)
    # df["Date"] = pd.to_datetime(df["Date"]).apply(lambda x: int(x.strftime('%Y-%m').replace("-", "")))
    df = df.groupby(["Date", "Entity"]).agg({"Denominations": lambda x: x.sum()}).reset_index()
    df.sort_values(by="Date", inplace=True)
    df['Cumulative'] = df.groupby('Entity')['Denominations'].cumsum()
    return df

In [None]:
def create_aggregate(df):
    df["Entity"] = df["Entity"].apply(lambda x: x.replace("(POLITICAL PARTY)", "").strip())
    df = df.groupby(["Entity"]).agg({"Denominations": lambda x: x.sum()}).reset_index()
    df.sort_values(by="Denominations", ascending=False, inplace=True)
    return df

In [None]:
def plot_scatter(x, y, title='Scatter Plot of X and Y Coordinates', x_label='X', y_label='Y', color="cyan", color_map=None, size_map=None):
    output_notebook()
    curdoc().theme = 'dark_minimal'

    p = figure(width=800, height=600, title=title)

    use_color = color_map if color_map is not None else color
    use_size = size_map if size_map is not None else 8
    p.scatter(x, y, size=use_size, color=use_color, alpha=0.5, legend_label='Data')

    p.xaxis.axis_label = x_label
    p.yaxis.axis_label = y_label
    p.grid.visible = True

    coefficients = np.polyfit(x, y, 1)
    slope = coefficients[0]
    intercept = coefficients[1]
    trendline = slope * x + intercept
    p.line(x, trendline, line_color='blue', line_width=1, legend_label='Trendline')

    hover = HoverTool(tooltips=[('X', '@x'), ('Y', '@y')])
    p.add_tools(hover)
    p.add_tools(PanTool(), BoxZoomTool(), WheelZoomTool(), ResetTool())
    show(p)

In [None]:
def plot_lines(df, x, y, cat, title='Line Chart', x_label='X', y_label='Y', color_map=None):
    output_notebook()
    curdoc().theme = 'dark_minimal'

    p = figure(width=1200, height=600, title=title)

    aggregated_df = df.groupby(cat)[y].sum().reset_index()
    top_20_categories = aggregated_df.nlargest(20, y)[cat]
    df = df[df[cat].isin(top_20_categories)]

    categories = df[cat].unique()
    colors = color_map if color_map else Category20[len(categories)]

    for i, category in enumerate(categories):
        category_data = df[df[cat] == category]
        p.line(category_data[x], category_data[y], line_color=colors[i % len(colors)], line_width=2, legend_label=category)

    p.legend.spacing = 0
    p.legend.margin = 0
    p.legend.padding = 0
    p.legend.label_text_font_size = '6pt'
    p.legend.click_policy = 'hide'
    p.add_layout(p.legend[0], 'right')

    p.add_tools(PanTool(), BoxZoomTool(), WheelZoomTool(), ResetTool())
    show(p)

In [None]:
df_purchase_time = create_timeseries(df_purchase)
df_purchase_time

In [None]:
df_cash_time = create_timeseries(df_cash)
df_cash_time

In [None]:
df_purchase_agg = create_aggregate(df_purchase)
df_purchase_agg

In [None]:
df_cash_agg = create_aggregate(df_cash)
df_cash_agg

In [None]:
df_purchase_time.to_csv("./df_purchase_time.csv", index=False)
df_cash_time.to_csv("./df_cash_time.csv", index=False)
df_purchase_agg.to_csv("./df_purchase_agg.csv", index=False)
df_cash_agg.to_csv("./df_cash_agg.csv", index=False)

In [None]:
color_map = [
    "#7F00FF",  # Purple
    "#FF00FF",  # Magenta
    "#FF0000",  # Red
    "#FF8C00",  # Dark Orange
    "#00FFFF",  # Cyan
    "#F5F5DC",  # Beige
    "#0000FF",  # Blue
    "#C0C0C0",  # Silver
    "#808080",  # Gray
    "#FAEBD7",  # Antique White
    "#00FFFF",  # Aqua
    "#FFE4C4",  # Bisque
    "#F0F8FF",  # Alice Blue
    "#FFA500",  # Orange
    "#F0FFFF",  # Azure
    "#008000",  # Green
    "#FFEBCD",  # Blanched Almond
    "#7FFFD4",  # Aquamarine
    "#8A2BE2",  # Blue Violet
    "#A52A2A",  # Brown
    "#DEB887",  # Burly Wood
    "#5F9EA0",  # Cadet Blue
    "#7FFF00",  # Chartreuse
    "#D2691E",  # Chocolate
]

In [None]:
plot_lines(df_cash_time, "Date", "Denominations", "Entity", color_map=color_map, title="Annual Encashments by Party")

In [None]:
plot_lines(df_cash_time, "Date", "Cumulative", "Entity", color_map=color_map, title="Cumulative Encashments by Party")

In [None]:
plot_lines(df_purchase_time, "Date", "Denominations", "Entity", color_map=color_map, title="Annual Donations by Entity")

In [None]:
plot_lines(df_purchase_time, "Date", "Cumulative", "Entity", color_map=color_map, title="Cumulative Donations by Entity")