In [1]:
import pandas as pd

# Load the final dataset
df = pd.read_csv("feature_engineered_data.csv")

# Basic dataset summary
summary = {
    "Total Users": df.shape[0],
    "Total Features": df.shape[1],
    "Gender Breakdown": df["Gender_Male"].value_counts().to_dict() if "Gender_Male" in df.columns else "N/A",
    "Average Age": round(df["Age"].mean(), 2),
    "Average Daily Usage (Minutes)": round(df["Daily_Usage_Time"].mean(), 2),
}

# Convert summary to DataFrame and save
summary_df = pd.DataFrame(list(summary.items()), columns=["Metric", "Value"])
summary_df.to_csv("final_summary.csv", index=False)

# Display summary
summary_df


Unnamed: 0,Metric,Value
0,Total Users,500.0
1,Total Features,28.0
2,Gender Breakdown,
3,Average Age,0.51
4,Average Daily Usage (Minutes),0.41


In [2]:
final_readme = """
# GenZ Dating App Analysis - Final Report

## 📊 Project Overview
This project analyzed dating app usage among Gen-Z, focusing on:
- Gender-based preferences
- Age-related usage trends
- Regional and temporal behavior
- Feature engineering for predictive modeling

## 📊 Key Insights
- The most active users are between **{0} and {1} years old**.
- Users spend an average of **{2} minutes daily** on dating apps.
- **Metro users** tend to be more active compared to rural users.
- Many users use **multiple dating apps simultaneously**.

## 📂 Files Included
- `feature_engineered_data.csv` - Final dataset
- `final_summary.csv` - Summary of key metrics
- `final_summary.ipynb` - Notebook with insights

## 📌 Next Steps
- Apply machine learning models to predict user behavior.
- Improve data collection for underrepresented demographics.
""".format(df["Age"].min(), df["Age"].max(), round(df["Daily_Usage_Time"].mean(), 2))

# Save README file
with open("README.md", "w", encoding="utf-8") as file:
    file.write(final_readme)

print("README.md updated successfully!")


README.md updated successfully!
