In [3]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from tabulate import tabulate
from itertools import combinations
import os
import warnings

# Suppress warnings
warnings.filterwarnings("ignore")

# Load data from Excel file
file_path = "SurveyData.xlsx"  # Update with actual path
xls = pd.ExcelFile(file_path)
first_sheet_name = xls.sheet_names[0]  # Get the first sheet name
survey_main = pd.read_excel(xls, sheet_name=first_sheet_name)

# Cleaning column names
def clean_column_names(df):
    df.columns = df.columns.str.lower().str.replace(" ", "_")
    return df

survey_main = clean_column_names(survey_main)

# Create output directory if not exists
output_dir = "media"
os.makedirs(output_dir, exist_ok=True)

# Display Summary Statistics
print("\nSummary Statistics:")
print(tabulate(survey_main.describe(), headers='keys', tablefmt='pretty'))

# Frequency Tables for Categorical Variables
categorical_vars = ["gender", "marital_status", "education", "religion", "subcaste", 
                    "income_sources", "bpl_status", "ration_card", "loan_status"]

for var in categorical_vars:
    print(f"\nFrequency Table for {var}:")
    freq_table = survey_main[var].value_counts(dropna=False).reset_index()
    freq_table.columns = [var, "Count"]
    print(tabulate(freq_table, headers='keys', tablefmt='pretty'))
    
    # Bar plot visualization
    plt.figure(figsize=(8, 4))
    sns.barplot(x=freq_table[var], y=freq_table["Count"], palette="viridis")
    plt.xticks(rotation=45)
    plt.title(f"Frequency of {var}")
    plt.xlabel(var)
    plt.ylabel("Count")
    plt.savefig(os.path.join(output_dir, f"Frequency_of_{var}.jpg"))
    plt.close()




Summary Statistics:
+-------+--------------------+----------------------+----------------------------+-----------------------+--------------------+-------------------------+---------------------------+------------------------------+------------------------+----------------------+-------------------+--------------------+---------------------+---------------------+---------------------+------------------------+-------------------------+-------------------------+-------------------+------------------------------+-------------------+--------------------+---------------------+---------------------+--------------------+--------------------+--------------------+--------------------+---------------------+--------------------+---------------------+--------------------+--------+--------------------+-------------+---------------------+------------+---------------------+---------------------+----------------------+--------------------+--------------------+----------------------+------------------


Frequency Table for marital_status:
+---+----------------+-------+
|   | marital_status | Count |
+---+----------------+-------+
| 0 |    married     |  261  |
| 1 |   unmarried    |  34   |
| 2 |       _        |   5   |
| 3 |    divorced    |   3   |
+---+----------------+-------+

Frequency Table for education:
+---+------------------+-------+
|   |    education     | Count |
+---+------------------+-------+
| 0 |     primary      |  98   |
| 1 |    secondary     |  96   |
| 2 | higher_secondary |  48   |
| 3 |    illiterate    |  44   |
| 4 |     graduate     |  17   |
+---+------------------+-------+

Frequency Table for religion:
+----+----------+-------+
|    | religion | Count |
+----+----------+-------+
| 0  |    हिंदू    |  266  |
| 1  |    हिंदु    |  24   |
| 2  |   बौद्ध    |   4   |
| 3  |   हिन्दु    |   1   |
| 4  |    हिंदुं    |   1   |
| 5  |  Hindu   |   1   |
| 6  |    हिंदी    |   1   |
| 7  |    हिदू    |   1   |
| 8  | हिंदू , कुणबी |   1   |
| 9  |   मुस्लिम  

In [4]:
# Generate Pivot Tables for All Pairs of Categorical Variables
combos = list(combinations(categorical_vars, 2))
for var1, var2 in combos:
    print(f"\nPivot Table for {var1} vs {var2}:")
    pivot_table = pd.crosstab(survey_main[var1], survey_main[var2])
    print(tabulate(pivot_table, headers='keys', tablefmt='pretty'))
    
    # Heatmap visualization
    plt.figure(figsize=(10, 6))
    sns.heatmap(pivot_table, annot=True, fmt="d", cmap="coolwarm", linewidths=0.5)
    plt.title(f"Heatmap of {var1} vs {var2}")
    plt.xlabel(var2)
    plt.ylabel(var1)
    plt.savefig(os.path.join(output_dir, f"Heatmap_of_{var1}_vs_{var2}.jpg"))
    plt.close()



Pivot Table for gender vs marital_status:
+--------+---+----------+---------+-----------+
| gender | _ | divorced | married | unmarried |
+--------+---+----------+---------+-----------+
| female | 1 |    0     |    4    |     0     |
|  male  | 4 |    3     |   257   |    34     |
+--------+---+----------+---------+-----------+

Pivot Table for gender vs education:
+--------+----------+------------------+------------+---------+-----------+
| gender | graduate | higher_secondary | illiterate | primary | secondary |
+--------+----------+------------------+------------+---------+-----------+
| female |    0     |        0         |     0      |    3    |     2     |
|  male  |    17    |        48        |     44     |   95    |    94     |
+--------+----------+------------------+------------+---------+-----------+

Pivot Table for gender vs religion:
+--------+-------+-----+------+----+----+----+-----+----------+--------+-------+----+-----+
| gender | Hindu | बौद्ध | मुस्लिम | हिंदी | ह


Pivot Table for gender vs bpl_status:
+--------+-----+-----+
| gender | no  | yes |
+--------+-----+-----+
| female |  3  |  2  |
|  male  | 133 | 165 |
+--------+-----+-----+

Pivot Table for gender vs ration_card:
+--------+------+--------+-------+--------+
| gender | none | orange | white | yellow |
+--------+------+--------+-------+--------+
| female |  0   |   4    |   0   |   1    |
|  male  |  5   |  151   |   6   |  136   |
+--------+------+--------+-------+--------+

Pivot Table for gender vs loan_status:
+--------+----+-----+
| gender | no | yes |
+--------+----+-----+
| female | 0  |  5  |
|  male  | 36 | 262 |
+--------+----+-----+

Pivot Table for marital_status vs education:
+----------------+----------+------------------+------------+---------+-----------+
| marital_status | graduate | higher_secondary | illiterate | primary | secondary |
+----------------+----------+------------------+------------+---------+-----------+
|       _        |    0     |        1         | 


Pivot Table for marital_status vs income_sources:
+----------------+-------+------------+-------------+-------------------+------------------------+--------------------------------------+----------------------+------------------------------------+------------------------------+--------------------+----------------------------------+--------------------------+----------------------------+-------------------------------+---------------------------------------------+-------------------------------------+-----------------------------+--------+--------------+
| marital_status | Other | Privatejob | agriculture | agriculture Other | agriculture Privatejob | agriculture Privatejob GovernmentJob | agriculture business | agriculture business GovernmentJob | agriculture business Pension | agriculture labour | agriculture labour GovernmentJob | agriculture labour Other | agriculture labour Pension | agriculture labour Privatejob | agriculture labour Privatejob GovernmentJob | agriculture labour 


Pivot Table for education vs income_sources:
+------------------+-------+------------+-------------+-------------------+------------------------+--------------------------------------+----------------------+------------------------------------+------------------------------+--------------------+----------------------------------+--------------------------+----------------------------+-------------------------------+---------------------------------------------+-------------------------------------+-----------------------------+--------+--------------+
|    education     | Other | Privatejob | agriculture | agriculture Other | agriculture Privatejob | agriculture Privatejob GovernmentJob | agriculture business | agriculture business GovernmentJob | agriculture business Pension | agriculture labour | agriculture labour GovernmentJob | agriculture labour Other | agriculture labour Pension | agriculture labour Privatejob | agriculture labour Privatejob GovernmentJob | agriculture labour P


Pivot Table for religion vs income_sources:
+----------+-------+------------+-------------+-------------------+------------------------+--------------------------------------+----------------------+------------------------------------+------------------------------+--------------------+----------------------------------+--------------------------+----------------------------+-------------------------------+---------------------------------------------+-------------------------------------+-----------------------------+--------+--------------+
| religion | Other | Privatejob | agriculture | agriculture Other | agriculture Privatejob | agriculture Privatejob GovernmentJob | agriculture business | agriculture business GovernmentJob | agriculture business Pension | agriculture labour | agriculture labour GovernmentJob | agriculture labour Other | agriculture labour Pension | agriculture labour Privatejob | agriculture labour Privatejob GovernmentJob | agriculture labour Privatejob Other |


Pivot Table for religion vs loan_status:
+----------+----+-----+
| religion | no | yes |
+----------+----+-----+
|  Hindu   | 0  |  1  |
|   बौद्ध    | 0  |  4  |
|   मुस्लिम   | 0  |  1  |
|    हिंदी    | 0  |  1  |
|    हिंदु    | 3  | 21  |
|    हिंदुं    | 0  |  1  |
|    हिंदू    | 33 | 233 |
| हिंदू , कुणबी | 0  |  1  |
|  हिंदू कुणबी  | 0  |  1  |
|  हिंदू गोर   | 0  |  1  |
|    हिदू    | 0  |  1  |
|   हिन्दु    | 0  |  1  |
+----------+----+-----+

Pivot Table for subcaste vs income_sources:
+---------------+-------+------------+-------------+-------------------+------------------------+--------------------------------------+----------------------+------------------------------------+------------------------------+--------------------+----------------------------------+--------------------------+----------------------------+-------------------------------+---------------------------------------------+-------------------------------------+-----------------------------+-------


Pivot Table for subcaste vs bpl_status:
+---------------+----+-----+
|   subcaste    | no | yes |
+---------------+----+-----+
|       -       | 1  |  0  |
|  Dhangar NTC  | 0  |  1  |
|    General    | 1  |  0  |
|     Kunbi     | 0  |  1  |
|   Kunbi 83    | 0  |  1  |
|      N T      | 0  |  1  |
|      NT       | 5  |  1  |
|    NT (A)     | 1  |  0  |
|    NT (D)     | 0  |  1  |
|     NT C      | 1  |  1  |
|     NT-C      | 2  |  1  |
|     NT.C      | 1  |  0  |
|     NT/C      | 1  |  0  |
|      NTC      | 1  |  1  |
|      Na       | 1  |  0  |
|      Nil      | 1  |  0  |
|    O. B. C    | 1  |  0  |
|      OBC      | 47 | 55  |
|     OPEN      | 4  |  4  |
|      Obc      | 4  |  0  |
|     Open      | 6  |  0  |
| Rajput Bhamta | 0  |  1  |
|      S T      | 1  |  0  |
|      SBC      | 2  |  2  |
|      SC       | 3  |  5  |
|      ST       | 0  |  7  |
|      Sc       | 0  |  1  |
|     VG NT     | 1  |  0  |
|     VJ NT     | 1  |  0  |
|     VJ-NT     | 2  |  0  |
| 


Pivot Table for income_sources vs bpl_status:
+---------------------------------------------+----+-----+
|               income_sources                | no | yes |
+---------------------------------------------+----+-----+
|                    Other                    | 1  |  0  |
|                 Privatejob                  | 0  |  1  |
|                 agriculture                 | 31 | 29  |
|              agriculture Other              | 0  |  2  |
|           agriculture Privatejob            | 3  |  5  |
|    agriculture Privatejob GovernmentJob     | 0  |  1  |
|            agriculture business             | 1  |  0  |
|     agriculture business GovernmentJob      | 1  |  0  |
|        agriculture business Pension         | 0  |  1  |
|             agriculture labour              | 86 | 109 |
|      agriculture labour GovernmentJob       | 1  |  0  |
|          agriculture labour Other           | 0  |  2  |
|         agriculture labour Pension          | 1  |  0  |
|        