<a href="https://colab.research.google.com/github/martinpius/Practical_1/blob/main/Final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install fpdf # Installing if not available

In [None]:
#from google.colab import files

In [None]:
# Import executable module
#files.upload()

In [None]:
# from google.colab import drive
# import os
# # Load your Google Drive
# drive.mount("/content/drive/", force_remount = True)

In [None]:
# os.chdir("/content/drive/Othercomputers/My MacBook Pro")

In [None]:
import pandas as pd
import numpy as np
import json # Saving .json--> rich text fmt
from ipywidgets import interact # To create interactive plots
import plotly.express as px # To create interactive plots
import matplotlib.pyplot as plt
import seaborn as sns
from fpdf import FPDF # report creation

In [None]:
url = "https://raw.githubusercontent.com/martinpius/PG_training/refs/heads/main/boston_housing.csv"

In [None]:
## 1. Reusable Functions for Common Tasks

def load_housing_data(file_path= url):
    """
    ---> Fn descreption
    Load and preprocess data with consistent formatting

    """
    df = pd.read_csv(file_path)
    display(df.head(3))
    df['AGECategory'] = pd.cut(df['AGE'], bins=[0, 35, 65, float("inf")],
                                  labels=['teen', 'Adult','Senior'])
    return df

In [None]:
df = load_housing_data()

In [None]:
display(df.head(3))

In [None]:
#df.describe().to_dict()

In [None]:
def save_results(results_dict):
    """
    Save analysis outputs to JSON

    """
    with open('files_res.json', 'w') as f:
        json.dump(results_dict, f)
    #print(f"Results saved to {'myjson.json'}")

In [None]:
DATA_PATH = url
OUTPUT_DIR = '/content'

In [None]:
def run_full_analysis():
    """
    End-to-end analysis workflow-reusing the above

    """
    df = load_housing_data(DATA_PATH)
    df.drop("AGECategory", axis = 1, inplace = True)

    # Perform analysis
    summary_stats = df.describe().to_dict()
    corr_matrix = df.corr().to_dict()

    # Save outputs
    save_results({'stats': summary_stats, 'correlations': corr_matrix})

    return summary_stats, corr_matrix

In [None]:
sumsta, cor = run_full_analysis() # Calling fn

In [None]:
cor

In [None]:
income_bins = [[0, 3, 6, 9, 15], [0, 5, 10, 15]]

In [None]:
for l in income_bins:
    for dt in l:
        print(dt)

In [None]:
# Batch processing automated:
df = pd.read_csv(DATA_PATH)

# Batch process multiple parameter combinations
param_grid = {
    'income_bins': [[0, 3, 6, 9, 15], [0, 5, 10, 15]],
    'age_thresholds': [40, 50, 60]
}

results = []

for bins in param_grid['income_bins']:
    for threshold in param_grid['age_thresholds']:
        # Automated categorization
        df['IncomeGroup'] = pd.cut(df['MEDV'], bins=bins)
        df['OldHouse'] = np.where(df['AGE'] > threshold, 1, 0)

        # Store configuration and result
        results.append({
            'bins': bins,
            'threshold': threshold,
            'old_houses_avg': df['OldHouse'].mean()
        })

pd.DataFrame(results).to_csv('results.csv', index=False)

In [None]:
#df33 = pd.DataFrame({"a": [2,2], "b": [3,2]})

In [None]:
#df33.to_csv("myout.csv")

In [None]:
# Automate report
# Generate PDF report with results
def create_report(summary_stats,
                  output_file='report.pdf'):
    """
    Convert analysis results to PDF
    """
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)

    pdf.cell(200, 10, txt="Boston Housing Analysis Report", ln=1, align='C')
    pdf.cell(200, 10, txt="bakakakakakak", ln=1, align='C')

    # Add summary table
    pdf.cell(200, 10, txt="Key Statistics:", ln=1)
    for stat, value in summary_stats['MEDV'].items():
        pdf.cell(200, 10, txt=f"{stat}: {value:.2f}", ln=1)

    pdf.output(output_file)

# Usage
stats = df.describe().to_dict()
create_report(stats)

In [None]:
# Interactive Dashboard in Python [Using plotly]

In [None]:

# Load data from URL (Other version of housing price)
df = pd.read_csv('https://raw.githubusercontent.com/ageron/handson-ml2/master/datasets/housing/housing.csv')

In [None]:
display(df.head(3))

In [None]:
Ocean_Proximity = df['ocean_proximity'].unique()

In [None]:
Ocean_Proximity

In [None]:
Min_Income=(0.0, 15.0, 0.5)

df[(df['ocean_proximity'] == Ocean_Proximity) &
    (df['median_income'] >= Min_Income)]

In [None]:
Ocean_Proximity = df['ocean_proximity'].unique()
Min_Income=(0.0, 15.0, 0.5)
Max_House_Age=(0, 100, 5)

In [None]:
filtered = df[
        (df['ocean_proximity'] == Ocean_Proximity) &
        (df['median_income'] >= Min_Income) &
        (df['housing_median_age'] <= Max_House_Age)
    ]

In [None]:
# Interactive dashboard
@interact
def housing_dashboard(
    Ocean_Proximity = df['ocean_proximity'].unique(),
    Min_Income=(0.0, 15.0, 0.5),
    Max_House_Age=(0, 100, 5)):

    filtered = df[
        (df['ocean_proximity'] == Ocean_Proximity) &
        (df['median_income'] >= Min_Income) &
        (df['housing_median_age'] <= Max_House_Age)
    ]

    # Create interactive plot
    fig = px.scatter(
        filtered,
        x="median_income",
        y="median_house_value",
        color="population",
        size="total_rooms",
        hover_name="ocean_proximity"
    )
    # Save as HTML
    fig.write_html("Mydashboard.html")
    fig.show()

    # Show stats
    print(f"Found {len(filtered)} properties")
    display(filtered.describe())