In [12]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load data from Excel file
file_path = "SurveyData.xlsx"  # Update with actual path
xls = pd.ExcelFile(file_path)  # Load the Excel file
first_sheet_name = xls.sheet_names[0]  # Get the first sheet name
survey_main = pd.read_excel(xls, sheet_name=first_sheet_name)  # Load first sheet

# Cleaning column names
def clean_column_names(df):
    df.columns = df.columns.str.lower().str.replace(" ", "_")
    return df

# Clean column names
survey_main = clean_column_names(survey_main)

# Open LaTeX output file
latex_file = "survey_analysis_output.tex"
with open(latex_file, "w", encoding="utf-8") as f:
    # LaTeX Preamble
    f.write("\\documentclass{article}\n")
    f.write("\\usepackage{booktabs}\n")
    f.write("\\usepackage{longtable}\n")
    f.write("\\usepackage{caption}\n")
    f.write("\\begin{document}\n")
    f.write("\\title{Survey Data Analysis}\n")
    f.write("\\author{Automated Report}\n")
    f.write("\\date{\\today}\n")
    f.write("\\maketitle\n")
    
    # Basic Summary Statistics in LaTeX Format
    f.write("\n\\section{Summary Statistics}\n")
    sum_stats = survey_main.describe()
    f.write(sum_stats.to_latex(escape=True, index=True, caption="Summary Statistics for Numerical Variables", label="tab:summary_statistics"))
    
    # Frequency Tables for Categorical Variables
    categorical_vars = ["gender", "marital_status", "education", "religion", "caste", 
                        "income_sources", "bpl_status", "ration_card", "loan_status"]
    
    for var in categorical_vars:
        f.write(f"\n\\section{{Frequency Table for {var}}}\n")
        freq_table = survey_main[var].value_counts(dropna=False).reset_index()
        freq_table.columns = [var, "Count"]
        f.write(freq_table.to_latex(index=False, escape=True, caption=f"Frequency Table for {var}", label=f"tab:{var}_freq"))
    
    # Generate Pivot Tables for All Pairs of Categorical Variables in LaTeX Format
    from itertools import combinations
    
    combos = list(combinations(categorical_vars, 2))
    for var1, var2 in combos:
        f.write(f"\n\\section{{Pivot Table for {var1} vs {var2}}}\n")
        pivot_table = pd.crosstab(survey_main[var1], survey_main[var2])
        f.write(pivot_table.to_latex(index=True, escape=True, caption=f"Pivot Table for {var1} vs {var2}", label=f"tab:{var1}_{var2}"))
    
    # End Document
    f.write("\\end{document}\n")

print(f"LaTeX output saved to {latex_file}")

LaTeX output saved to survey_analysis_output.tex


In [None]:
# Frequency Tables for Categorical Variables
categorical_vars = ["gender", "marital_status", "education", "religion", "caste", 
                    "income_sources", "bpl_status", "ration_card", "loan_status"]

for var in categorical_vars:
    print(f"\nFrequency Table for {var}:")
    print(survey_main[var].value_counts(dropna=False))

# Generate Pivot Tables for All Pairs of Categorical Variables
from itertools import combinations

combos = list(combinations(categorical_vars, 2))
for var1, var2 in combos:
    print(f"\nPivot Table for {var1} vs {var2}:")
    pivot_table = pd.crosstab(survey_main[var1], survey_main[var2])
    latex_table = pivot_table.to_latex(index=True, caption=f"Pivot Table for {var1} vs {var2}", label=f"tab:{var1}_{var2}")
    print(latex_table)

# Visualization Example: Gender Distribution
plt.figure(figsize=(8,5))
sns.countplot(data=survey_main, x="gender", palette="Set2")
plt.title("Gender Distribution")
plt.xlabel("Gender")
plt.ylabel("Count")
plt.show()