In [11]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, Flatten
import pandas as pd
import ast  # Use for safely evaluating Python dictionary strings

# Load data
df = pd.read_csv("user_data.csv")

# Function to safely parse dictionary-like strings
def safe_dict_parse(x):
    if isinstance(x, str):  
        try:
            return ast.literal_eval(x)  # Convert string to Python dictionary
        except (SyntaxError, ValueError):
            print(f"Skipping invalid JSON row: {x}")
            return {}  # Return empty dictionary if parsing fails
    return x  

# Convert 'posts' column safely
df["posts"] = df["posts"].apply(safe_dict_parse)

# Extract relevant fields
if not df["posts"].isnull().all():  # Check if there is valid data
    posts_df = pd.json_normalize(df["posts"], errors='ignore')
else:
    posts_df = pd.DataFrame()  # Empty DataFrame if no valid JSON

# Debugging: Print extracted column names
print("Extracted columns from 'posts':", posts_df.columns)

# Merge extracted data
df_expanded = df.drop(columns=["posts"]).join(posts_df)

# Debugging: Print available columns
print("Available columns in df_expanded:", df_expanded.columns)

# Check if required columns exist
required_columns = ["category.id", "id", "view_count"]  # Adjust based on actual columns
missing_columns = [col for col in required_columns if col not in df_expanded.columns]

if missing_columns:
    raise KeyError(f"Missing columns: {missing_columns}. Check dataset structure!")

# Fill NaN values
df_expanded.fillna(0, inplace=True)

# Define features and labels
X = df_expanded[["category.id", "id"]].values
y = df_expanded["view_count"].values  # Change label column if needed

# Define deep learning model
model = Sequential([
    Embedding(input_dim=10000, output_dim=32, input_length=X.shape[1]),
    Flatten(),
    Dense(128, activation="relu"),
    Dense(64, activation="relu"),
    Dense(1, activation="sigmoid")
])

# Compile model
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Train model
model.fit(X, y, epochs=10, batch_size=32)

# Save model
model.save("recommendation_model.h5")
print("Model training complete!")


Extracted columns from 'posts': Index(['id', 'topic', 'slug', 'title', 'identifier', 'comment_count',
       'upvote_count', 'view_count', 'exit_count', 'rating_count',
       'average_rating', 'share_count', 'bookmark_count', 'video_link',
       'contract_address', 'chain_id', 'chart_url', 'is_locked', 'created_at',
       'first_name', 'last_name', 'username', 'user_type', 'has_evm_wallet',
       'has_solana_wallet', 'upvoted', 'bookmarked',
       'is_available_in_public_feed', 'thumbnail_url', 'gif_thumbnail_url',
       'following', 'picture_url', 'post_summary', 'tags', 'source_matrix',
       'category.id', 'category.name', 'category.count',
       'category.description', 'category.image_url', 'baseToken.address',
       'baseToken.name', 'baseToken.symbol', 'baseToken.image_url', 'topic.id',
       'topic.name', 'topic.description', 'topic.image_url', 'topic.slug',
       'topic.is_public', 'topic.project_code', 'topic.posts_count',
       'topic.language', 'topic.created_at'



[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.0106 - loss: -11.6756
Epoch 2/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.0122 - loss: -76.6427
Epoch 3/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.0110 - loss: -257.1953
Epoch 4/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.0032 - loss: -760.8701    
Epoch 5/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.0075 - loss: -1925.6217   
Epoch 6/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.0127 - loss: -4503.2188   
Epoch 7/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.0060 - loss: -9546.4014   
Epoch 8/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.0082 - loss: -18008.2012    
Epoch 9/10
[1m12/12[0



Model training complete!


In [1]:
from tensorflow.keras.models import load_model

model = load_model("D:/VideoRecom/video-recommendation-assignment/scripts/recommendation_model.h5")

print(model.summary())  # Check the input layer shape




None
