# Exploratory Data Analysis

In [1]:
# Load necessary packages
import glob
import json
import ipywidgets as widgets
from IPython.display import display, clear_output
import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS

from utils.helper import preprocess_tweet

In [2]:
fintech_path = './data/fintech/'
fintech_folders = glob.glob(f"{fintech_path}*/", recursive=True)
fintech = [path.split('/')[-2] for path in fintech_folders]

nbfc_path = './data/nbfc/'
nbfc_folders = glob.glob(f"{nbfc_path}*/", recursive=True)
nbfc = [path.split('/')[-2] for path in nbfc_folders]

In [3]:
with open('./data/config.json') as f:
    config_data = json.load(f)

In [4]:
fintech_dropdown = widgets.Dropdown(
    options=fintech,
    value=fintech[0],
    description='Fintech company:',
)

fintech_out = widgets.Output()

def on_change_fintech(change):
    with fintech_out:
        clear_output()
        if change['type'] == 'change' and change['name'] == 'value':
            clear_output()
            comp = change['new']
            stopwords = list(STOPWORDS)
            fintech_df = pd.read_csv(f'./data/fintech/{comp}/{comp}.csv')
            text = " ".join(fintech_df['tweet'])
            text = preprocess_tweet(text)
            if comp in config_data["stopwords"]["fintech"]:
                stopwords = stopwords + config_data["stopwords"]["fintech"][comp]
            wordcloud = WordCloud(width = 1400, height = 800,
                                    background_color ='white',
                                    stopwords = stopwords,
                                    min_font_size = 10).generate(text)
            
            # plot the WordCloud image
            plt.imshow(wordcloud)
            plt.axis("off")
            plt.tight_layout(pad = 0)
            plt.show()

fintech_dropdown.observe(on_change_fintech, names='value')

display(widgets.VBox([fintech_dropdown, fintech_out]))

VBox(children=(Dropdown(description='Fintech company:', options=('lendingkart', 'mobikwik', 'faircent', 'paytm…

In [5]:
nbfc_dropdown = widgets.Dropdown(
    options=nbfc,
    value=nbfc[0],
    description='NBFC company:',
)

nbfc_out = widgets.Output()

def on_change_nbfc(change):
    with nbfc_out:
        clear_output()
        if change['type'] == 'change' and change['name'] == 'value':
            clear_output()
            comp = change['new']
            stopwords = list(STOPWORDS)
            fintech_df = pd.read_csv(f'./data/nbfc/{comp}/{comp}.csv')
            text = " ".join(fintech_df['tweet'])
            text = preprocess_tweet(text)
            if comp in config_data["stopwords"]["nbfc"]:
                stopwords = stopwords + config_data["stopwords"]["nbfc"][comp]
            wordcloud = WordCloud(width = 1400, height = 800,
                                    background_color ='white',
                                    stopwords = stopwords,
                                    min_font_size = 10).generate(text)
            
            # plot the WordCloud image
            plt.imshow(wordcloud)
            plt.axis("off")
            plt.tight_layout(pad = 0)
            plt.show()

nbfc_dropdown.observe(on_change_nbfc, names='value')

display(widgets.VBox([nbfc_dropdown, nbfc_out]))

VBox(children=(Dropdown(description='NBFC company:', options=('muthoot_finance', 'bajaj_finance', 'aditya_birl…