<a href="https://colab.research.google.com/github/pxs1990/DeepLearning/blob/main/simple_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
import numpy as np

In [6]:
# Create sample DataFrame with real column names
data = {
    'User_ID': np.random.randint(1000001, 1000101, size=100),  # Sample User IDs
    'Product_ID': [f'P{i}' for i in range(100)],  # Sample Product IDs
    'Gender': np.random.choice(['F', 'M'], size=100),  # Sample Gender values
    'Age': np.random.choice(['0-17', '18-25', '26-35', '36-45', '46-50', '51-55', '55+'], size=100),  # Sample Age groups
    'Occupation': np.random.randint(0, 20, size=100),  # Sample Occupation values
    'City_Category': np.random.choice(['A', 'B', 'C'], size=100),  # Sample City categories
    'Stay_In_Current_City_Years': np.random.choice(['0', '1', '2', '3', '4+'], size=100),  # Sample Stay in current city years
    'Marital_Status': np.random.randint(0, 2, size=100),  # Sample Marital Status
    'Product_Category_1': np.random.randint(1, 20, size=100),  # Sample Product Category 1
    'Product_Category_2': np.random.randint(1, 20, size=100),  # Sample Product Category 2
    'Product_Category_3': np.random.randint(1, 20, size=100),  # Sample Product Category 3
    'Purchase': np.random.randint(1000, 20000, size=100)  # Sample Purchase amounts
}
df = pd.DataFrame(data)

df.head()



Unnamed: 0,User_ID,Product_ID,Gender,Age,Occupation,City_Category,Stay_In_Current_City_Years,Marital_Status,Product_Category_1,Product_Category_2,Product_Category_3,Purchase
0,1000091,P0,F,0-17,18,B,0,0,12,7,11,11425
1,1000070,P1,F,55+,16,C,3,0,12,1,19,14202
2,1000054,P2,M,51-55,13,B,3,1,2,18,6,17826
3,1000088,P3,M,36-45,2,A,2,0,17,9,7,9777
4,1000070,P4,M,55+,12,B,2,1,18,1,18,7352


In [7]:
# Split features (X) and target variable (y)
X = df.drop('Purchase', axis=1)  # Features are all columns except 'Purchase'
y = df['Purchase']  # Target variable is 'Purchase'


In [8]:
X.shape, y.shape

((100, 11), (100,))

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding
from tensorflow.keras.optimizers import Adam

# Preprocessing
# Assuming 'X' contains the features and 'y' contains the target variable
# Convert categorical variables into numerical representations (one-hot encoding)
X_encoded = pd.get_dummies(X, columns=['Gender', 'Age', 'City_Category', 'Stay_In_Current_City_Years'])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# Scale numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build the RNN Model
model = Sequential()
model.add(Embedding(input_dim=X_train_scaled.shape[0], output_dim=128))
model.add(LSTM(64))
model.add(Dense(1))

# Compile the model
model.compile(optimizer=Adam(), loss='mean_squared_error', metrics=['mae'])

# Training
history = model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluation
test_loss, test_mae = model.evaluate(X_test_scaled, y_test)
print(f"Test Loss: {test_loss}, Test MAE: {test_mae}")
