In [5]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from tabulate import tabulate
from itertools import combinations
import os
import warnings

# Suppress warnings
warnings.filterwarnings("ignore")

# Load data from Excel file
file_path = "SurveyData.xlsx"  # Update with actual path
xls = pd.ExcelFile(file_path)
first_sheet_name = xls.sheet_names[0]  # Get the first sheet name
survey_main = pd.read_excel(xls, sheet_name=first_sheet_name)

# Cleaning column names
def clean_column_names(df):
    df.columns = df.columns.str.lower().str.replace(" ", "_")
    return df

survey_main = clean_column_names(survey_main)

# Create output directory if not exists
output_dir = "media"
os.makedirs(output_dir, exist_ok=True)

# Display Summary Statistics
print("\nSummary Statistics:")
print(tabulate(survey_main.describe(), headers='keys', tablefmt='pretty'))

# Frequency Tables for Categorical Variables
categorical_vars = ["gender", "marital_status", "education", "religion", "subcaste", 
                    "income_sources", "bpl_status", "ration_card", "loan_status"]

for var in categorical_vars:
    print(f"\nFrequency Table for {var}:")
    freq_table = survey_main[var].value_counts(dropna=False).reset_index()
    freq_table.columns = [var, "Count"]
    print(tabulate(freq_table, headers='keys', tablefmt='pretty'))
    
    # Bar plot visualization
    plt.figure(figsize=(8, 4))
    sns.barplot(x=freq_table[var], y=freq_table["Count"], palette="viridis")
    plt.xticks(rotation=45)
    plt.title(f"Frequency of {var}")
    plt.xlabel(var)
    plt.ylabel("Count")
    plt.savefig(os.path.join(output_dir, f"Frequency_of_{var}.jpg"))
    plt.close()




Summary Statistics:
+-------+--------------------+--------------------+--------------------+----------------------+----------------------------+-----------------------+--------------------+-------------------------+---------------------------+------------------------------+------------------------+----------------------+-------------------+--------------------+---------------------+---------------------+---------------------+------------------------+-------------------------+-------------------------+-------------------+------------------------------+-------------------+--------------------+---------------------+---------------------+--------------------+--------------------+--------------------+--------------------+---------------------+--------------------+---------------------+--------------------+--------+--------------------+-------------+---------------------+------------+---------------------+---------------------+----------------------+--------------------+--------------------


Frequency Table for religion:
+---+----------+-------+
|   | religion | Count |
+---+----------+-------+
| 0 |    हिंदू    |  298  |
| 1 |   बौद्ध    |   4   |
| 2 |   मुस्लिम   |   1   |
+---+----------+-------+

Frequency Table for subcaste:
+----+------------+-------+
|    |  subcaste  | Count |
+----+------------+-------+
| 0  |    OBC     |  178  |
| 1  |    OPEN    |  22   |
| 2  |    VJNT    |  15   |
| 3  |     ST     |  14   |
| 4  |    NTC     |  12   |
| 5  |     SC     |  12   |
| 6  |     NT     |   9   |
| 7  |   कुणबीपाटील   |   9   |
| 8  |    SBC     |   8   |
| 9  |    मराठा     |   6   |
| 10 |   Kunbi    |   4   |
| 11 |     -      |   3   |
| 12 |    nan     |   3   |
| 13 |    NTB     |   2   |
| 14 | DhangarNTC |   1   |
| 15 |  हिंदूगोरबंजारा   |   1   |
| 16 |   NT(A)    |   1   |
| 17 |   NT(D)    |   1   |
| 18 |    बंजारा     |   1   |
| 19 |  General   |   1   |
+----+------------+-------+

Frequency Table for income_sources:
+----+------------------------

In [6]:
# Generate Pivot Tables for All Pairs of Categorical Variables
combos = list(combinations(categorical_vars, 2))
for var1, var2 in combos:
    print(f"\nPivot Table for {var1} vs {var2}:")
    pivot_table = pd.crosstab(survey_main[var1], survey_main[var2])
    print(tabulate(pivot_table, headers='keys', tablefmt='pretty'))
    
    # Heatmap visualization
    plt.figure(figsize=(10, 6))
    sns.heatmap(pivot_table, annot=True, fmt="d", cmap="coolwarm", linewidths=0.5)
    plt.title(f"Heatmap of {var1} vs {var2}")
    plt.xlabel(var2)
    plt.ylabel(var1)
    plt.savefig(os.path.join(output_dir, f"Heatmap_of_{var1}_vs_{var2}.jpg"))
    plt.close()



Pivot Table for gender vs marital_status:
+--------+---+----------+---------+-----------+
| gender | _ | divorced | married | unmarried |
+--------+---+----------+---------+-----------+
| female | 1 |    0     |    4    |     0     |
|  male  | 4 |    3     |   257   |    34     |
+--------+---+----------+---------+-----------+

Pivot Table for gender vs education:
+--------+----------+------------------+------------+---------+-----------+
| gender | graduate | higher_secondary | illiterate | primary | secondary |
+--------+----------+------------------+------------+---------+-----------+
| female |    0     |        0         |     0      |    3    |     2     |
|  male  |    17    |        48        |     44     |   95    |    94     |
+--------+----------+------------------+------------+---------+-----------+

Pivot Table for gender vs religion:
+--------+-----+------+-----+
| gender | बौद्ध | मुस्लिम | हिंदू  |
+--------+-----+------+-----+
| female |  0  |  0   |  5  |
|  male  |


Pivot Table for marital_status vs bpl_status:
+----------------+-----+-----+
| marital_status | no  | yes |
+----------------+-----+-----+
|    divorced    |  1  |  2  |
|    married     | 121 | 144 |
|   unmarried    | 14  | 21  |
+----------------+-----+-----+

Pivot Table for marital_status vs ration_card:
+----------------+------+--------+-------+--------+
| marital_status | none | orange | white | yellow |
+----------------+------+--------+-------+--------+
|    divorced    |  0   |   1    |   0   |   2    |
|    married     |  5   |  134   |   5   |  121   |
|   unmarried    |  0   |   20   |   1   |   14   |
+----------------+------+--------+-------+--------+

Pivot Table for marital_status vs loan_status:
+----------------+----+-----+
| marital_status | no | yes |
+----------------+----+-----+
|    divorced    | 0  |  3  |
|    married     | 32 | 233 |
|   unmarried    | 4  | 31  |
+----------------+----+-----+

Pivot Table for education vs religion:
+------------------+-----+


Pivot Table for education vs loan_status:
+------------------+----+-----+
|    education     | no | yes |
+------------------+----+-----+
|     graduate     | 1  | 16  |
| higher_secondary | 5  | 43  |
|    illiterate    | 4  | 40  |
|     primary      | 18 | 80  |
|    secondary     | 8  | 88  |
+------------------+----+-----+

Pivot Table for religion vs subcaste:
+----------+---+------------+---------+-------+----+-------+-------+-----+-----+-----+------+-----+----+----+------+--------+-----+-----+---------+
| religion | - | DhangarNTC | General | Kunbi | NT | NT(A) | NT(D) | NTB | NTC | OBC | OPEN | SBC | SC | ST | VJNT | कुणबीपाटील | बंजारा | मराठा | हिंदूगोरबंजारा |
+----------+---+------------+---------+-------+----+-------+-------+-----+-----+-----+------+-----+----+----+------+--------+-----+-----+---------+
|   बौद्ध    | 0 |     0      |    0    |   0   | 0  |   0   |   0   |  0  |  0  |  0  |  0   |  0  | 4  | 0  |  0   |   0    |  0  |  0  |    0    |
|   मुस्लिम   | 0 | 


Pivot Table for subcaste vs bpl_status:
+------------+----+-----+
|  subcaste  | no | yes |
+------------+----+-----+
|     -      | 3  |  0  |
| DhangarNTC | 0  |  1  |
|  General   | 1  |  0  |
|   Kunbi    | 0  |  4  |
|     NT     | 5  |  4  |
|   NT(A)    | 1  |  0  |
|   NT(D)    | 0  |  1  |
|    NTB     | 2  |  0  |
|    NTC     | 7  |  5  |
|    OBC     | 82 | 96  |
|    OPEN    | 12 | 10  |
|    SBC     | 4  |  4  |
|     SC     | 4  |  8  |
|     ST     | 3  | 11  |
|    VJNT    | 6  |  9  |
|   कुणबीपाटील   | 4  |  5  |
|    बंजारा     | 0  |  1  |
|    मराठा     | 0  |  6  |
|  हिंदूगोरबंजारा   | 0  |  1  |
+------------+----+-----+

Pivot Table for subcaste vs ration_card:
+------------+------+--------+-------+--------+
|  subcaste  | none | orange | white | yellow |
+------------+------+--------+-------+--------+
|     -      |  0   |   3    |   0   |   0    |
| DhangarNTC |  0   |   1    |   0   |   0    |
|  General   |  0   |   1    |   0   |   0    |
|   Kunbi    | 

In [7]:
#pd.pivot_table(
#    survey_main, 
#    values='column_to_aggregate', 
#    index='column_to_group_by', 
#    columns='column_to_pivot', 
#    aggfunc='function')

In [8]:
pd.pivot_table(
    survey_main, 
    values=['farmers_name'], 
    index='taluka', 
    columns='gender', 
    aggfunc='count',fill_value= 0)

Unnamed: 0_level_0,farmers_name,farmers_name
gender,female,male
taluka,Unnamed: 1_level_2,Unnamed: 2_level_2
अमळनेर,0,8
एरंडोल,0,16
चाळीसगाव,0,27
चोपडा,0,22
जळगाव,1,24
जामनेर,1,60
धरणगाव,1,31
पाचोरा,1,14
पारोळा,1,39
बोदवड,0,10


In [9]:
pd.pivot_table(
    survey_main, 
    values=['age'], 
    index='taluka', 
    columns='marital_status', 
    aggfunc='mean',fill_value= 0)

Unnamed: 0_level_0,age,age,age,age
marital_status,_,divorced,married,unmarried
taluka,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
अमळनेर,0.0,0.0,55.0,0.0
एरंडोल,0.0,30.0,52.076923,24.0
चाळीसगाव,59.0,0.0,45.208333,27.5
चोपडा,0.0,35.0,45.777778,27.0
जळगाव,0.0,0.0,48.416667,27.0
जामनेर,38.0,0.0,42.4,30.6
धरणगाव,0.0,0.0,51.107143,29.75
पाचोरा,0.0,0.0,41.538462,29.0
पारोळा,43.5,37.0,45.636364,27.0
बोदवड,65.0,0.0,50.777778,0.0


In [20]:
survey_main.depression_1

0       often
1      rarely
2      rarely
3      rarely
4      rarely
        ...  
298     never
299    rarely
300    always
301    rarely
302    rarely
Name: depression_1, Length: 303, dtype: object