# **Train Prediction Model**

In [None]:
# -*- coding: utf-8 -*-
# ---
# jupyter:
#   jupytext:
#     text_representation:
#       extension: .py
#       format_name: light
#       format_version: '1.5'
#       jupytext_version: 1.13.7
#   kernelspec:
#     display_name: Python 3
#     name: python3
# ---

# # Bangalore House Price Prediction - Model Training Notebook

# Is notebook mein hum saaf kiye gaye data ko load karenge, uspar ek Machine Learning model train karenge, aur web app mein istemal karne ke liye uske parameters (coefficients) nikalenge.

# ## Step 1: Zaroori Libraries Import Karna
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# ## Step 2: Saaf Kiya Hua Data Load Karna
# Hum woh file load kar rahe hain jo 'data_cleaning.py' script ne banayi thi.
cleaned_data_path = 'data/cleaned_bengaluru_house_data.csv'

try:
    df = pd.read_csv(cleaned_data_path)
    print(f"File '{cleaned_data_path}' successfully loaded!")
    print("Cleaned Data ki shuruaati 5 rows:")
    print(df.head())
except FileNotFoundError:
    print(f"ERROR: File '{cleaned_data_path}' nahi mili.")
    print("Pehle 'data_cleaning.py' script ko chalayein taaki yeh file ban sake.")

# ## Step 3: Data ko Model ke liye Taiyar Karna (Preprocessing)

# ### 3a. One-Hot Encoding
# 'location' jaise text column ko machine learning model samajh nahi sakta. Hum har location ke liye ek naya column banayenge (0 ya 1).
dummies = pd.get_dummies(df.location)

# ### 3b. DataFrames ko Jodna
# Original DataFrame ke saath dummies ko jodte hain.
df2 = pd.concat([df, dummies], axis='columns')

# ### 3c. Original 'location' column ko Hatana
# Ab jab humne dummies bana liye hain, to original text wale 'location' column ki zaroorat nahi hai.
df3 = df2.drop('location', axis='columns')
print("\nData preprocessing ke baad (Top 5 rows):")
print(df3.head())

# ## Step 4: Features (X) aur Target (y) ko Alag Karna
# X mein woh sabhi columns honge jinke basis par hum prediction karna chahte hain.
# y mein woh column hoga jise hum predict karna chahte hain (price).
X = df3.drop('price', axis='columns')
y = df3.price

# ## Step 5: Model ko Train Karna

# ### 5a. Data ko Training aur Testing set mein Baantna
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

# ### 5b. Linear Regression Model Banana aur Train Karna
model = LinearRegression()
model.fit(X_train, y_train)

# ### 5c. Model ki Performance Check Karna
# .score() humein R-squared value deta hai, jo batata hai ki model kitna accha hai (0 se 1 ke beech).
score = model.score(X_test, y_test)
print(f"\nModel successfully train ho gaya hai!")
print(f"Model ka testing score (R-squared): {score * 100:.2f}%")

# ## Step 6: Web App ke liye Model ke Parameters Nikalna

# Inhi "magic numbers" ko hum apni web app (index.html) ke JavaScript code mein use karenge.
print("\n--- JavaScript mein use karne ke liye Model Parameters ---")
print(f"\nIntercept (Base Price): {model.intercept_}")

# Har feature ke liye ek weight (coefficient)
coefficients = pd.DataFrame({'feature': X.columns, 'coefficient': model.coef_})
print("\nCoefficients (har feature ke liye weight):")
print(coefficients.to_string())
print("\nYeh values copy karke HTML file ke JavaScript section mein paste karein.")


In [None]:
# import pandas as pd
# from sklearn.model_selection import train_test_split
# from sklearn.linear_model import LinearRegression
# import io

# # -- Saaf kiya hua data load karna --
# # Hum yahan ek chhota sample use kar rahe hain. Asli app mein aap file se load karenge.
# # df = pd.read_csv('cleaned_bengaluru_house_data.csv')
# cleaned_csv_data = """location,total_sqft,bath,price,bhk
# Electronic City Phase II,1056.0,2.0,39.07,2
# Chikka Tirupathi,2600.0,5.0,120.0,4
# Uttarahalli,1440.0,2.0,62.0,3
# Lingadheeranahalli,1521.0,3.0,95.0,3
# Kothanur,1200.0,2.0,51.0,2
# Whitefield,1170.0,2.0,38.0,2
# Marathahalli,1310.0,3.0,63.25,3
# Gottigere,1258.5,2.0,40.0,2
# Binny Pete,1755.0,3.0,100.0,3
# Thanisandra,1000.0,2.0,43.0,2
# """
# df = pd.read_csv(io.StringIO(cleaned_csv_data))

# # -- Data ko Model ke liye Taiyar Karna --

# # 1. One-Hot Encoding: 'location' column ko numerical banate hain.
# # get_dummies har location ke liye ek naya column bana dega (0 ya 1).
# dummies = pd.get_dummies(df.location)
# df2 = pd.concat([df, dummies.drop('Thanisandra', axis='columns')], axis='columns') # Ek column drop karte hain (dummy variable trap)

# # Ab 'location' column ki zaroorat nahi hai
# df3 = df2.drop('location', axis='columns')

# # 2. Features (X) aur Target (y) ko alag karna
# X = df3.drop('price', axis='columns') # price ke alawa sab kuch X hai
# y = df3.price # price hamara target (y) hai

# # 3. Model Train Karna
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)
# model = LinearRegression()
# model.fit(X_train, y_train)

# print(f"Model training score: {model.score(X_test, y_test) * 100:.2f}%")

# # -- Model ke "Magic Numbers" (Coefficients) ko nikalna --
# # Inhi numbers ko hum JavaScript mein use karenge
# print("\n--- JavaScript mein use karne ke liye Model Parameters ---")
# print(f"Intercept (Base Price): {model.intercept_}")
# # Har column ke liye ek weight (coefficient)
# coefficients = pd.DataFrame(model.coef_, X.columns, columns=['Coefficient'])
# print("\nCoefficients (Weights for each feature):")
# print(coefficients)
# print("\nYeh values copy karke neeche diye gaye HTML file ke JavaScript section mein paste karein.")
