## Name : Pravin Nandankar
## KNN ALgorithm on House Price Prediction

In [None]:
import pandas as pd  
import numpy as np  
import matplotlib.pyplot as plt  
import seaborn as sns  
from sklearn.model_selection import train_test_split  
from sklearn.preprocessing import StandardScaler  
from sklearn.neighbors import KNeighborsRegressor  
from sklearn.metrics import mean_squared_error, r2_score  
import warnings  
warnings.filterwarnings('ignore')

In [None]:
# Load the dataset  
data = pd.read_csv('Housing_data.csv')  

# Display the first few rows of the dataset  
data.head()

In [None]:
# Basic information about the dataset  
data.info()

In [None]:
# Summary statistics  
data.describe()

In [None]:
# Check for missing values  
data.isnull().sum()

In [None]:
# Visualize the distribution of the target variable (e.g., house prices)  
plt.figure(figsize=(10, 6))  
sns.histplot(data['Price'], bins=30, kde=True)  
plt.title('Distribution of House Prices')  
plt.xlabel('Price')  
plt.ylabel('Frequency')  
plt.show()  

In [None]:
# Identify non-numeric columns  
non_numeric_columns = data.select_dtypes(include=['object']).columns  
print("Non-numeric columns:", non_numeric_columns)  

# Option 1: Convert categorical variables to numeric using one-hot encoding  
data_encoded = pd.get_dummies(data, columns=non_numeric_columns, drop_first=True)  

# Option 2: Alternatively, you can drop non-numeric columns if they are not needed  
# data_encoded = data.drop(columns=non_numeric_columns)  

# Now calculate the correlation matrix  
plt.figure(figsize=(12, 8))  
correlation_matrix = data_encoded.corr()  
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap='coolwarm')  
plt.title('Correlation Matrix')  
plt.show()

In [None]:
# Prepare the data for KNN  
# Assuming 'price' is the target variable and the rest are features  
X = data.drop('Price', axis=1)  ## Price ko chhodke sab
y = data['Price']  

In [None]:
# Handle categorical variables if any (e.g., using one-hot encoding)  
X = pd.get_dummies(X, drop_first=True)  

In [None]:
# Split the dataset into training and testing sets  
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Standardize the features  
scaler = StandardScaler()  
X_train = scaler.fit_transform(X_train)  
X_test = scaler.transform(X_test) 

In [None]:
# Apply KNN  
knn = KNeighborsRegressor(n_neighbors=3)  
knn.fit(X_train, y_train)  

In [None]:
# Make predictions  
y_pred = knn.predict(X_test)  

# Evaluate the model  
mse = mean_squared_error(y_test, y_pred)  
r2 = r2_score(y_test, y_pred)  

print(f'Mean Squared Error: {mse:.2f}')  
print(f'R^2 Score: {r2:.2f}')  

In [None]:
def predict_price(input_data):  
    # Convert input data to DataFrame  
    input_df = pd.DataFrame([input_data])  
    
    # Encode categorical variables  
    input_df_encoded = pd.get_dummies(input_df, drop_first=True)  
    
    # Align the input data with the training data  
    input_df_encoded = input_df_encoded.reindex(columns=X.columns, fill_value=0)  
    
    # Standardize the input data  
    input_scaled = scaler.transform(input_df_encoded)  
    
    # Make the prediction  
    predicted_price = knn.predict(input_scaled)  
    
    return predicted_price[0]  

In [None]:
# Example input data for prediction  
user_input = {  
    'Location': 'Chennai',  # Replace with actual location  
    'Bedrooms': 3,          # Replace with actual number of bedrooms  
    'Bathrooms': 2,         # Replace with actual number of bathrooms  
    'Area': 1200,           # Replace with actual area in square feet  
    # Add other necessary features here  
}  

# Predict the price  
predicted_price = predict_price(user_input)  
print(f"Predicted house price: â‚¹{predicted_price:.2f}")  