In [None]:
import pandas as pd
import glob
import os
import random

# Read individual CSV files and store them in a list
file_paths = glob.glob(r"D:\Housing Prices Prediction\All Metropolitan Cities\*.csv")
dfs = []
city_names = []
for file_path in file_paths:
    city_name = os.path.splitext(os.path.basename(file_path))[0]
    df = pd.read_csv(file_path)
    dfs.append(df)
    city_names.extend([city_name] * len(df))

# Combine all dataframes and city names into one dataframe
combined_df = pd.concat(dfs, ignore_index=True)
combined_df["City"] = city_names

# Select 16,000 random data points
random_df = combined_df.sample(n=16000, random_state=42)

# Reset index to add a unique index to each row
random_df = random_df.reset_index(drop=True)

# Save the randomly selected dataframe to a new CSV file
random_df.to_csv("random_housing_dataset.csv", index=False)


In [None]:
import csv
csv_file_path = r'D:\Housing Prices Prediction\random_housing_dataset.csv'
text_file_path = r'D:\Housing Prices Prediction\output.txt'

with open(csv_file_path, 'r') as csv_file, open(text_file_path, 'w') as text_file:
        csv_reader = csv.reader(csv_file)
        for row in csv_reader:
            row_str = ', '.join(str(item) for item in row)
            text_file.write(f'({row_str}),\n')



In [None]:
import pandas as pd

# Load the dataset into a DataFrame
df = pd.read_csv('random_housing_dataset.csv')

# Generate unique numeric IDs
ids = range(1, len(df) + 1)

# Add the IDs as a new column
df['ID'] = ids

# Verify that the IDs are unique
if len(df['ID'].unique()) == len(df):
    print("IDs are unique.")
else:
    print("IDs are not unique.")

# Save the updated dataset to a new file
df.to_csv('updated_dataset.csv', index=False)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error
import pickle
from flask import Flask, jsonify, request


# Read the dataset
df = pd.read_csv("housing_dataset.csv")

# Split the data into features (X) and target variable (y)
X = df[['City', 'Area']]
y = df['Price']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocess categorical columns using one-hot encoding
preprocessor = ColumnTransformer(
    transformers=[('encoder', OneHotEncoder(drop='first'), [0])],
    remainder='passthrough'
)

# Create a pipeline with preprocessing and random forest regressor
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('regressor', RandomForestRegressor(random_state=42))])

# Fit the pipeline on the training data
pipeline.fit(X_train, y_train)

pickle.dump(pipeline, open('model.pkl', 'wb'))
