In [4]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [5]:
import os

# Create directories for the project
os.makedirs('/content/project/models', exist_ok=True)
os.makedirs('/content/project/templates', exist_ok=True)
os.makedirs('/content/project/static', exist_ok=True)


In [7]:
%%writefile /content/project/models/model.py
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix
import pickle

# Load the dataset
data = pd.read_csv('/content/gdrive/My Drive/Colab Notebooks/Capstone Project/sample30.csv')

# Text preprocessing function
def clean_text(text):
    text = text.lower()
    text = re.sub(r'\d+', '', text)  # Remove digits
    text = text.translate(str.maketrans('', '', string.punctuation))  # Remove punctuation
    text = re.sub(r'\s+', ' ', text).strip()  # Remove extra spaces
    return text

data['cleaned_text'] = data['text'].apply(clean_text)

# TF-IDF Vectorization
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X = vectorizer.fit_transform(data['cleaned_text'])
y = data['label']

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Random Forest Model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Save the model and vectorizer
with open('random_forest_model.pkl', 'wb') as f:
    pickle.dump(rf_model, f)

with open('tfidf_vectorizer.pkl', 'wb') as f:
    pickle.dump(vectorizer, f)


Overwriting /content/project/models/model.py


In [8]:

%%writefile /content/project/templates/index.html
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Recommendation System</title>
</head>
<body>
    <h1>Welcome to the Recommendation System</h1>
    <form action="/recommend" method="POST">
        <label for="user_id">Enter User ID:</label>
        <input type="text" id="user_id" name="user_id" required>
        <button type="submit">Get Recommendations</button>
    </form>
</body>
</html>

Writing /content/project/templates/index.html


In [10]:
!rm -rf /content/project/app.py # Remove the directory if it exists

In [11]:

%%writefile /content/project/app.py
from flask import Flask, request, render_template
import pickle

# Initialize Flask app
app = Flask(__name__)

# Load the models
with open('random_forest_model.pkl', 'rb') as f:
    rf_model = pickle.load(f)

with open('tfidf_vectorizer.pkl', 'rb') as f:
    vectorizer = pickle.load(f)

with open('user_similarity.pkl', 'rb') as f:
    user_similarity_df = pickle.load(f)

# Route for home page
@app.route('/')
def home():
    return render_template('index.html')

# Route for recommendations
@app.route('/recommend', methods=['POST'])
def recommend():
    user_id = request.form['user_id']
    if user_id not in user_similarity_df.index:
        return f"User {user_id} not found in the system."

    # Get similar users
    similar_users = user_similarity_df.loc[user_id].sort_values(ascending=False).index

    # Load ratings data and recommend items
    ratings = pd.read_csv('data.csv')
    ratings = ratings.groupby(['user_id', 'item_id'], as_index=False).agg({'rating': 'mean'})

    # Find items rated by similar users
    recommended_items = ratings[ratings['user_id'].isin(similar_users)].groupby('item_id')['rating'].mean()
    recommended_items = recommended_items.sort_values(ascending=False).head(5).index.tolist()

    # Return recommendations
    return render_template('index.html', recommendations=recommended_items)

if __name__ == '__main__':
    app.run(debug=True)

Writing /content/project/app.py
