#Collaborative Filtering

## Loading and Preprocessing data

In [None]:
!pip install surprise

Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl.metadata (327 bytes)
Collecting scikit-surprise (from surprise)
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp310-cp310-linux_x86_64.whl size=2357251 sha256=7d4d84a0186295111e307c0dfcecba6fd5aa02e99bf531353794c09c89e261c5
  Stored in directory: /root/.cache/pip/wheels/4b/3f/df/6acbf0a40397d9bf3ff97f582cc22fb9ce66adde75bc71fd54
Successfully built scikit-surprise
Install

In [None]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split, cross_validate
from surprise import accuracy
from sklearn.preprocessing import StandardScaler

# Veri Yükleme
data = pd.read_csv('/content/data.csv', encoding='ISO-8859-1')

# Veriyi İnceleme
print(data.head())
print(data.info())

# Eksik Değerleri Ele Alma
print(data.isnull().sum())
data = data.dropna()
print(data.isnull().sum())

# Negatif Değerleri Kaldırma
data = data[data['Quantity'] > 0]
data = data[data['UnitPrice'] > 0]
print(data.describe())

# Özellik Mühendisliği
data['InvoiceNo'] = data['InvoiceNo'].astype('str')
data['StockCode'] = data['StockCode'].astype('str')

# Veriyi Ölçeklendirme
scaler = StandardScaler()
data[['Quantity', 'UnitPrice']] = scaler.fit_transform(data[['Quantity', 'UnitPrice']])

# Kullanıcı-Ürün Matrisi Oluşturma
user_product_matrix = data.pivot_table(index='CustomerID', columns='StockCode', values='Quantity', aggfunc='sum').fillna(0)
print(user_product_matrix.head())

  InvoiceNo StockCode                          Description  Quantity  \
0    536365    85123A   WHITE HANGING HEART T-LIGHT HOLDER       6.0   
1    536365     71053                  WHITE METAL LANTERN       6.0   
2    536365    84406B       CREAM CUPID HEARTS COAT HANGER       8.0   
3    536365    84029G  KNITTED UNION FLAG HOT WATER BOTTLE       6.0   
4    536365    84029E       RED WOOLLY HOTTIE WHITE HEART.       6.0   

      InvoiceDate  UnitPrice  CustomerID         Country  
0  12/1/2010 8:26       2.55     17850.0  United Kingdom  
1  12/1/2010 8:26       3.39     17850.0  United Kingdom  
2  12/1/2010 8:26       2.75     17850.0  United Kingdom  
3  12/1/2010 8:26       3.39     17850.0  United Kingdom  
4  12/1/2010 8:26       3.39     17850.0  United Kingdom  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12462 entries, 0 to 12461
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   InvoiceNo    124

### Define a Reader and load data into Surprise format

In [None]:
# Veriyi Surprise formatına dönüştürme
reader = Reader(rating_scale=(0, 1))  # Ölçeklendirilmiş veriler için 0-1 arası
data_surprise = Dataset.load_from_df(data[['CustomerID', 'StockCode', 'Quantity']], reader)

# Veriyi eğitim ve test setlerine ayırma
trainset, testset = train_test_split(data_surprise, test_size=0.25)

# Modeli tanımlama
algo = SVD(n_factors=50, n_epochs=20, lr_all=0.005, reg_all=0.02)
algo.fit(trainset)

# Tahmin yapma
predictions = algo.test(testset)

# RMSE Hesaplama
rmse = accuracy.rmse(predictions)
print(f"RMSE: {rmse}")

# MAE Hesaplama
mae = accuracy.mae(predictions)
print(f"MAE: {mae}")

# Çapraz Doğrulama
results = cross_validate(algo, data_surprise, measures=['RMSE', 'MAE'], cv=5, verbose=True)
print("Average RMSE:", results['test_rmse'].mean())
print("Average MAE:", results['test_mae'].mean())

RMSE: 0.7615
RMSE: 0.7614879269001803
MAE:  0.2137
MAE: 0.21373019168853483
Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.5820  0.7426  0.5055  0.4075  1.0307  0.8537  0.4229  
MAE (testset)     0.2520  0.2129  0.2008  0.2019  0.2495  0.2234  0.0227  
Fit time          0.05    0.05    0.05    0.05    0.05    0.05    0.00    
Test time         0.01    0.01    0.02    0.01    0.02    0.01    0.00    
Average RMSE: 0.853652830550933
Average MAE: 0.2234222031171746


In [None]:
!pip install flask-ngrok

Collecting flask-ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl.metadata (1.8 kB)
Downloading flask_ngrok-0.0.25-py3-none-any.whl (3.1 kB)
Installing collected packages: flask-ngrok
Successfully installed flask-ngrok-0.0.25


Deployment

In [None]:
from flask import Flask, request, jsonify, send_from_directory
import pandas as pd
from surprise import Dataset, Reader, SVD, accuracy
from surprise.model_selection import train_test_split, cross_validate
from sklearn.preprocessing import StandardScaler

app = Flask(__name__)

# Load and preprocess data
def load_and_preprocess_data():
    data = pd.read_csv('data.csv', encoding='ISO-8859-1')
    data.dropna(inplace=True)
    data = data[(data['Quantity'] > 0) & (data['UnitPrice'] > 0)]
    data['InvoiceNo'] = data['InvoiceNo'].astype('str')
    data['StockCode'] = data['StockCode'].astype('str')
    scaler = StandardScaler()
    data[['Quantity', 'UnitPrice']] = scaler.fit_transform(data[['Quantity', 'UnitPrice']])
    return data

data = load_and_preprocess_data()

# Create user-product matrix
user_product_matrix = pd.pivot_table(
    data,
    index='CustomerID',
    columns='StockCode',
    values='Quantity',
    aggfunc='sum'
).fillna(0)

# Train SVD model
def train_model(data):
    reader = Reader(rating_scale=(0, 1))
    data_surprise = Dataset.load_from_df(data[['CustomerID', 'StockCode', 'Quantity']], reader)
    trainset, testset = train_test_split(data_surprise, test_size=0.25)
    algo = SVD(n_factors=50, n_epochs=20, lr_all=0.005, reg_all=0.02)
    algo.fit(trainset)
    return algo, testset

algo, testset = train_model(data)

# Routes
@app.route('/recommend', methods=['GET'])
def recommend():
    user_id = request.args.get('user_id')
    if user_id is None:
        return jsonify({"error": "user_id parameter is required"}), 400

    try:
        user_id = int(user_id)
    except ValueError:
        return jsonify({"error": "user_id must be an integer"}), 400

    if user_id not in user_product_matrix.index:
        return jsonify({"error": "Invalid user_id"}), 400

    user_ratings = user_product_matrix.loc[user_id]
    unseen_products = user_ratings[user_ratings == 0].index.tolist()
    recommendations = [
        algo.predict(user_id, product) for product in unseen_products
    ]
    recommendations = sorted(recommendations, key=lambda x: x.est, reverse=True)[:10]
    recommended_products = [rec.iid for rec in recommendations]
    return jsonify(recommended_products)

@app.route('/favicon.ico')
def favicon():
    return send_from_directory('static', 'favicon.ico')

@app.route('/')
def home():
    return 'Hello, Flask!'

if __name__ == '__main__':
    app.run(debug=True)

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug: * Restarting with stat
