#### Analysis of Delinquency Cases' Remarks

*In this analysis, I've examined the remarks collected from various delinquency cases to identify potential reasons contributing to the issues. The remarks provide insights into what might have gone wrong in these cases, allowing to draw meaningful conclusions, if any.*

#### Data Sensitivity

*As the data used for this analysis contains sensitive information related to delinquency cases, the chart and data outputs from this description have been omitted to ensure data privacy and confidentiality.*


In [6]:
import pandas as pd

In [12]:
import warnings

# Filtering the warning by matching the warning message
warnings.filterwarnings("ignore", message="Unknown extension is not supported and will be removed")


In [13]:
data = pd.read_excel('xxx.xlsx')

In [None]:
print(data)

In [15]:
remarks = data['Remarks']

In [16]:
remarks_df = pd.DataFrame(remarks)

In [18]:
remarks_df = remarks_df.dropna(subset=['Remarks'])

In [None]:
print(remarks_df)

In [20]:
from collections import Counter
import re

In [None]:
def words_frequency(dataframe, column_name):
    all_remarks_text = ' '.join(dataframe[column_name])
    
    print(all_remarks_text)

    words = re.findall(r'\w+', all_remarks_text.lower())

    print(words)

    word_counts = Counter(words)

    print(word_counts)
    
    most_common_words = word_counts.most_common()
    
    print(most_common_words)

    return most_common_words

most_common_words = words_frequency(remarks_df, 'Remarks')

print(most_common_words[:50])

In [None]:
print(most_common_words)

In [36]:
import matplotlib.pyplot as plt

In [None]:
def plot_most_common_words(dataframe, column_name, words_to_plot):
    selected_words = []
    selected_counts = []
    for word, count in most_common_words:
        if word in words_to_plot:
            selected_words.append(word)
            selected_counts.append(count)       
    
    # bar plot
    plt.figure(figsize=(10, 6))
    bars = plt.bar(selected_words, selected_counts)
    plt.xlabel('Words')
    plt.ylabel('Counts')
    plt.title('Most Common Words')
    plt.xticks(rotation=45)

    # Annotating the bars with their counts
    for bar in bars:
        height = bar.get_height()
        plt.annotate('{}'.format(height),
                     xy=(bar.get_x() + bar.get_width() / 2, height),
                     xytext=(0, 3),  # 3 points vertical offset
                     textcoords="offset points",
                     ha='center', va='bottom')

    plt.tight_layout()
    plt.show()

words_to_plot = ['word','word','word']  # Adding the words to plot
plot_most_common_words(remarks_df, 'Remarks', words_to_plot)
