### <center>Exploring Hansard Data</center>

In this notebook we will explore the text in the hansard data for the time range of 2021 and 2020.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
from warnings import filterwarnings
from pylab import rcParams
filterwarnings(action='ignore', category=DeprecationWarning)

from utils.utils import *

%matplotlib inline

In [2]:
from PIL import Image
from wordcloud import WordCloud

rcParams['figure.figsize'] = 12, 8

In [1]:
class HOCDataExploration:   

    """
       A class used to explore hansard data for parliamentary speeches using word cloud,
       topics discussed and plots
    """
    
    base_path = r'./assets/images/'

    def __init__(self, df_head_dt_gp_21, df_head_dt_gp_20):
        self.df_head_dt_gp_21 = df_head_dt_gp_21
        self.df_head_dt_gp_20 = df_head_dt_gp_20

    def wordcloud_data(self,df):
        """ The function creates data for word cloud and returns it.
            Firstly the speech_processed is splitted and the token of words are created.
            Then the wordcloud is created and returned after adding background and the fonts.
        df - Dataframe """

        parliament_words = ''
        mask = np.array(Image.open(HOCDataExploration.base_path+'parliament_mask.jpg'))
        for text in df.speech_processed:
            tokens = text.split()
            parliament_words +=' '.join(tokens)+" "
            wordcloud = WordCloud(width = 800, height = 800,
            mask=mask,
            background_color ='white',
            min_font_size = 10).generate(parliament_words)
        return wordcloud

    def plot_word_cloud(self, wordcloud, file):
            """ The function plots the wordcloud data and save the file
            wordcloud - wordcloud data
            file - image name to be saved """

            plt.figure(figsize = (10, 10), facecolor = 'white', edgecolor='blue')
            plt.imshow(wordcloud)
            plt.axis("off")
            plt.title(f"Word Cloud of {file}", fontsize=35)
            plt.tight_layout(pad = 0)
            plt.savefig(HOCDataExploration.base_path+file+'.png')
            plt.show()

    def get_topics_plot(self, df, name):
            """ The function plots the topics discussed and save the file
            df - dataframe
            file - image name to be saved """

            plt.figure(figsize=(8,17))
            df['major_heading'].value_counts()[:100].plot(kind='barh')
            plt.title("Major Topics of HOC in 2021", fontsize=25)
            plt.ylabel("Major Heading", fontsize=16)
            plt.xlabel("No of Speeches", fontsize=16)
            plt.savefig(HOCDataExploration.base_path+name+'.png')
            plt.show()