In [1]:
import pandas as pd
import altair as alt

import sys
from pathlib import Path

sys.path.append(str(Path.cwd().parent))

from src.cleaning import get_clean_data

from static_data import NORTH_STATES, SOUTH_STATES

In [2]:
df = get_clean_data()

In [3]:
df_subject = df[df["answer_text"].str.contains("Pakistan")]

num_questions_df = df_subject.groupby(["year"]).size().reset_index(name="num_questions")
# viz1["month-year"] = viz1["month"].map(str) + "-" + viz1["year"].map(str)

num_questions_chart = alt.Chart(num_questions_df).mark_line(color="black", point={"filled":True, "color":"black"}).encode(
    x=alt.X("year:N", title="Year", axis=alt.Axis(tickCount=10, tickMinStep=5)),
    y=alt.Y("num_questions:Q", title="Number of Questons"),
).properties(
    title="How many questions in the Question Hour?",
    width=600
)

num_questions_chart

In [4]:
primary_state_df = df_subject.groupby(["primary_state"]).size().reset_index(name="num_questions")

num_questions_per_state = alt.Chart(primary_state_df).mark_bar().encode(
    x=alt.X("num_questions:Q", title="Number of Questions"),
    y=alt.Y("primary_state:N", title="State"),
).properties(
    title="How many questions in the Question Hour?",
    width=600
)

num_questions_per_state

In [None]:
# Source: https://altair-viz.github.io/user_guide/interactions/bindings_widgets.html#data-driven-lookups
search_by_state_df = df.groupby(["primary_state", "year"]).size().reset_index(name="num_questions")
states = sorted(df["primary_state"].unique().tolist())
input_dropdown = alt.binding_select(options=states, name='State')
selection = alt.selection_point(fields=['primary_state'], bind=input_dropdown)
color = (
    alt.when(selection)
    .then(alt.Color("primary_state:N").legend(None))
    .otherwise(alt.value(None))
)

search_by_state_chart = alt.Chart(search_by_state_df).mark_line().encode(
    x=alt.X('year:N', title="Year"),
    y=alt.Y('num_questions:Q', title="Number of Questions"),
    color=alt.value("steelblue"),
).add_params(
    selection
).transform_filter(
    selection
).properties(
    width=700,
    title="How many Questions did MPs ask in the Lok Sabha from 2000-2018?"
)
search_by_state_chart

#search_by_state_chart.save("../img/draft/search_by_state.html", embed_options={'renderer': 'svg'})

In [11]:
regional_questions = df[(df["primary_state"].isin(NORTH_STATES)) | (df["primary_state"].isin(SOUTH_STATES))]
regional_questions = regional_questions.groupby(["year", "primary_state"]).size().reset_index(name="num_questions")

south_questions = regional_questions[regional_questions["primary_state"].isin(SOUTH_STATES)]
north_questions = regional_questions[regional_questions["primary_state"].isin(NORTH_STATES)]

south_questions[south_questions["primary_state"]=="Andhra Pradesh"]

Unnamed: 0,year,primary_state,num_questions
0,2000,Andhra Pradesh,1981
9,2001,Andhra Pradesh,2408
18,2002,Andhra Pradesh,2843
28,2003,Andhra Pradesh,2149
39,2004,Andhra Pradesh,887
51,2005,Andhra Pradesh,1710
63,2006,Andhra Pradesh,1398
75,2007,Andhra Pradesh,1523
87,2008,Andhra Pradesh,1065
99,2009,Andhra Pradesh,1457


In [None]:
north_questions_chart = alt.Chart(north_questions).mark_bar(size=25).encode(
    x = alt.X(
        "num_questions:Q",
        axis = alt.Axis(labelAngle=0),
        title="Year",
        scale = alt.Scale(
            domain=[0, 3000],
            padding=0
        )
    ),
    y = alt.Y(
        "primary_state:N",
        title="State (North)"
    ),
    # color = alt.Color(
    #     "num_questions:Q",
    #     scale=alt.Scale(domain=[0,2500], scheme="purpleblue"),
    #     title="Number of questions asked"
    # )
).properties(
    width = 800,
    height = 400,
    title = "Questions by Southern State MPs"
)

south_questions_chart = alt.Chart(south_questions).mark_bar(size=25).encode(
    x = alt.X(
        "num_questions:Q",
        axis = alt.Axis(labelAngle=0),
        title="Year",
        scale = alt.Scale(
            domain=[0, 3000],
            padding=0,
        )
    ),
    y = alt.Y(
        "primary_state:N",
        title="State (South)"
    ),
    # color = alt.Color(
    #     "num_questions:Q",
    #     scale=alt.Scale(domain=[0,2500], scheme="purpleblue"),
    #     title="Number of questions asked"
    # )
).properties(
    width = 800,
    height = 400,
    title = "Questions by Northern State MPs"
)

# Source: https://altair-viz.github.io/gallery/multiple_interactions.html
year_slider = alt.binding_range(min=2001, max=2018, step=1, name='Year')
slider_selection = alt.selection_point(bind=year_slider, fields=['year'], value={'year': 2000})

north_slider_chart = north_questions_chart.add_params(
    slider_selection
).transform_filter(
    slider_selection
).properties(title='Number of Questions Asked')

south_slider_chart = south_qestions_chart.add_params(
    slider_selection
).transform_filter(
    slider_selection
).properties(title='Number of Questions Asked')

combined = alt.vconcat(north_slider_chart, south_slider_chart)

combined.save("../img/draft/num_questions_by_state.html", embed_options={'renderer': 'svg'})

  exec(code_obj, self.user_global_ns, self.user_ns)


In [47]:
temp = df.copy()
temp["subject"] = temp["subject"].str.lower()
sum(temp["subject"].str.contains("independence"))

17