In [3]:
import pandas as pd
import numpy as np
import io
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer


In [4]:

# Get the nfl user preferences file from the local storage for content recommendation
df = pd.read_csv('enhanced_nfl_user_preferences.csv')
print(df.head())

   UserID        FavPlayer               FavTeam MostLikedPlayerPost  \
0       1        Tom Brady  Tampa Bay Buccaneers           Tom Brady   
1       2      Jason Kelce   Philadelphia Eagles         Jason Kelce   
2       3  Patrick Mahomes    Kansas City Chiefs        Travis Kelce   
3       4        Tom Brady  Tampa Bay Buccaneers           Tom Brady   
4       5     Dak Prescott        Dallas Cowboys        Dak Prescott   

  MostWatchedTeamHighlights MostPurchasedMerchandise  VideoViewDuration  \
0      Tampa Bay Buccaneers         Tom Brady Jersey               1200   
1       Philadelphia Eagles               Eagles Cap                800   
2        Kansas City Chiefs           Chiefs T-shirt               1500   
3      Tampa Bay Buccaneers        Buccaneers Hoodie                900   
4            Dallas Cowboys           Cowboys Jersey               1100   

   LikesOnVideos  SharesOnVideos  CommentsOnVideos         SearchQueries  
0             30               5         

In [5]:
# Video recommendation algorithm for different user IDs. Unique content IDs are displayed upon randomization.

In [6]:
def get_unique_recommendations_for_users(user_id, df, diversity_factor=0.2):
    # Get user-specific data
    user_data = df[df['UserID'] == user_id].iloc[0]

    # Assuming a simple content pool for demonstration
    content_pool = pd.DataFrame({
        'ContentID': range(1, 21),  # 20 pieces of content for simplicity
        'Category': ['Tom Brady'] * 10 + ['NFL General'] * 10
    })

    # User's top preference
    top_preference = user_data['FavPlayer']

    # Calculate preference scores (simplified version)
    preference_scores = {
        'Tom Brady': user_data['LikesOnVideos'] if top_preference == 'Tom Brady' else user_data['LikesOnVideos'] * diversity_factor,
        'NFL General': user_data['LikesOnVideos'] * diversity_factor
    }

    # Assign scores to content
    content_pool['Score'] = content_pool['Category'].map(preference_scores)

    # Add randomness to ensure diversity
    content_pool['Randomness'] = np.random.rand(len(content_pool)) * diversity_factor
    content_pool['FinalScore'] = content_pool['Score'] + content_pool['Randomness']

    # Sort by final score to get top recommendations
    recommendations = content_pool.sort_values(by='FinalScore', ascending=False).head(5)

    return recommendations['ContentID'].tolist()

# Example: Get recommendations for user 1
recommendations_henry = get_unique_recommendations_for_users(1, df)
recommendations_paul = get_unique_recommendations_for_users(2, df)
recommendations_lionel = get_unique_recommendations_for_users(8, df)
recommendations_ciryl = get_unique_recommendations_for_users(9, df)
print(f"Recommended Content IDs for Henry: {recommendations_henry}")
print(f"Recommended Content IDs for Paul: {recommendations_paul}")
print(f"Recommended Content IDs for Lionel: {recommendations_lionel}")
print(f"Recommended Content IDs for Ciryl: {recommendations_ciryl}")


Recommended Content IDs for Henry: [7, 1, 9, 8, 3]
Recommended Content IDs for Paul: [14, 2, 11, 4, 6]
Recommended Content IDs for Lionel: [11, 2, 19, 15, 9]
Recommended Content IDs for Ciryl: [11, 8, 13, 15, 6]


In [7]:
content_pool_df = pd.read_csv('content_pool.csv')

In [8]:
def get_enhanced_recommendations_for_user(user_id, user_preferences_df, content_pool_df, diversity_factor=0.2):
    user_data = user_preferences_df[user_preferences_df['UserID'] == user_id].iloc[0]
    
    # Calculate preference scores based on likes and introduce randomness for diversity
    content_pool_df['Score'] = content_pool_df.apply(
        lambda x: user_data['LikesOnVideos'] if user_data['FavPlayer'] in x['Tags'] else 0, axis=1
    )
    content_pool_df['Score'] += np.random.rand(len(content_pool_df)) * diversity_factor * user_data['LikesOnVideos']
    
    # Get top recommendations based on scores
    recommendations = content_pool_df.sort_values(by='Score', ascending=False).head(5)['Title'].tolist()
    
    return recommendations

# Example usage:
# Assuming 'df' is the DataFrame loaded from 'content_pool.csv'
recommendations_user_Alex = get_enhanced_recommendations_for_user(1, df, content_pool_df)
recommendations_user_Siri = get_enhanced_recommendations_for_user(2, df, content_pool_df)
recommendations_user_Zarathustra = get_enhanced_recommendations_for_user(3, df, content_pool_df)
recommendations_user_Lei = get_enhanced_recommendations_for_user(4, df, content_pool_df)

print(f"Recommended Videos for Alex: {recommendations_user_Alex}")
print(" ")
print(f"Recommended Videos for Siri: {recommendations_user_Siri}")
print(" ")
print(f"Recommended Videos for Zarathustra: {recommendations_user_Zarathustra}")
print(" ")
print(f"Recommended Videos for Lei: {recommendations_user_Lei}")


Recommended Videos for Alex: ['Tom Brady: A Career Retrospective', 'A Day in the Life: Jason Kelce', 'Tom Brady’s Offseason Training', 'NFL Season Predictions: Who’s Winning?', 'Brady vs Kelce: A Tale of Titans']
 
Recommended Videos for Siri: ['Biggest Upsets in NFL History', 'Jason Kelce’s Training Regime', 'Ranking the NFL’s Top Defenses', 'Jason Kelce’s Best Plays Compilation', 'Season’s Top NFL Plays']
 
Recommended Videos for Zarathustra: ['Jason Kelce’s Training Regime', 'Ranking the NFL’s Top Defenses', 'Brady vs Kelce: A Tale of Titans', 'Exclusive Interview with Tom Brady', 'Tom Brady: A Career Retrospective']
 
Recommended Videos for Lei: ['Tom Brady: A Career Retrospective', 'Jason Kelce’s Journey to the Top', 'Rising Stars: Rookie Highlights', 'Tom Brady’s MVP Season Highlights', 'Season’s Top NFL Plays']


In [9]:

def calculate_similarity_score(tags_list, user_query):
    """Calculate similarity score between content tags and user's search query."""
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(tags_list + [user_query])
    cosine_sim = cosine_similarity(tfidf_matrix[:-1], tfidf_matrix[-1:])
    return cosine_sim.flatten()

In [130]:
# Generates personalized content recommendations for a user based on their preferences, 
# using similarity scoring, weighted scoring, and slight randomness to ensure diversity.

In [10]:

def get_advanced_recommendations_for_user(user_id, user_preferences_df, content_pool_df, n_recommendations=5, diversity_factor=0.12):
    user_data = user_preferences_df[user_preferences_df['UserID'] == user_id].iloc[0]
    
    # Assume search query is concatenated user preferences and interests
    user_search_query = " ".join([user_data['FavPlayer'], user_data['FavTeam'], user_data['MostLikedPlayerPost']])
    
    # Calculate similarity scores for content based on user's interests
    content_pool_df['SimilarityScore'] = calculate_similarity_score(content_pool_df['Tags'].tolist(), user_search_query)
    
    # Apply weighted scoring considering likes, views, etc.
    content_pool_df['WeightedScore'] = (content_pool_df['SimilarityScore'] * 0.6 + 
                                        user_data['LikesOnVideos'] * 0.2 + 
                                        user_data['VideoViewDuration'] * 0.2)
    
    # Introduce randomness for diversity
    content_pool_df['FinalScore'] = content_pool_df['WeightedScore'] + np.random.rand(len(content_pool_df)) * diversity_factor
    
    # Sort by final score to get top recommendations
    recommendations = content_pool_df.sort_values(by='FinalScore', ascending=False).head(n_recommendations)['Title'].tolist()
    
    return recommendations

# Example usage with a larger dataset and enhanced recommendations.
recommendations_Roger = get_advanced_recommendations_for_user(1, df, content_pool_df)
recommendations_Novak = get_advanced_recommendations_for_user(2, df, content_pool_df)
recommendations_Rafael = get_advanced_recommendations_for_user(5, df, content_pool_df)
recommendations_Cris = get_advanced_recommendations_for_user(8, df, content_pool_df)


print(f"Recommended Videos for Roger: {recommendations_Roger}")
print(" ")
print(f"Recommended Videos for Novak: {recommendations_Novak}")
print(" ")
print(f"Recommended Videos for Roger: {recommendations_Rafael}")
print(" ")
print(f"Recommended Videos for Novak: {recommendations_Cris}")


Recommended Videos for Roger: ['Tom Brady: A Career Retrospective', 'Tom Brady’s Offseason Training', 'Tom Brady’s MVP Season Highlights', 'Brady vs Kelce: A Tale of Titans', 'Exclusive Interview with Tom Brady']
 
Recommended Videos for Novak: ['Jason Kelce’s Journey to the Top', 'Jason Kelce’s Best Plays Compilation', 'Jason Kelce’s Training Regime', 'Brady vs Kelce: A Tale of Titans', 'A Day in the Life: Jason Kelce']
 
Recommended Videos for Roger: ['This Week in NFL: Top Highlights', 'Tom Brady: A Career Retrospective', 'Tom Brady’s MVP Season Highlights', 'A Day in the Life: Jason Kelce', 'Brady vs Kelce: A Tale of Titans']
 
Recommended Videos for Novak: ['Jason Kelce’s Training Regime', 'Jason Kelce’s Best Plays Compilation', 'A Day in the Life: Jason Kelce', 'Jason Kelce’s Journey to the Top', 'Brady vs Kelce: A Tale of Titans']


In [11]:
# Assuming 'nfl_video_rec_pool.csv' is correctly named and located
larger_nfl_video_content_df = pd.read_csv('nfl_video_rec_pool.csv')

# Dummy user preferences data
# Correctly defined user preferences data with 'MostLikedPlayerPost'
user_pref_data = {
    'UserID': [1, 2, 3, 4],
    'FavPlayer': ['Patrick Mahomes', 'Aaron Rodgers', 'Derrick Henry', 'NFL General'],
    'LikesOnVideos': [25, 15, 30, 10],
    'VideoViewDuration': [300, 200, 500, 100],  # Total duration in seconds
    'SearchQueries': ['Mahomes game winner', 'Rodgers training', 'Henry highlights', 'NFL draft'],
    'MostLikedPlayerPost': ['Mahomes touchdown', 'Rodgers comeback', 'Henry 2000 yards', 'NFL MVP race']  # Added column
}

user_preferences_df = pd.DataFrame(user_pref_data)


In [12]:
# Function to calculate similarity scores between content tags and a user query
def calculate_similarity_scores(content_tags, user_query):
    vectorizer = TfidfVectorizer()
    content_tags_tfidf = vectorizer.fit_transform(content_tags)
    user_query_tfidf = vectorizer.transform([user_query])
    
    # Calculate cosine similarity scores
    sim_scores = cosine_similarity(content_tags_tfidf, user_query_tfidf).flatten()
    return sim_scores

In [13]:
def get_advanced_recommendations_with_weighted_similarity_scores_in_larger_datasets(user_id, user_preferences_df, larger_nfl_video_content_df, n_recommendations=5, diversity_factor=0.22):
    # Retrieve user-specific data
    user_data = user_preferences_df[user_preferences_df['UserID'] == user_id].iloc[0]
    
    # Concatenate user preferences into a search query
    user_search_query = " ".join([user_data['FavPlayer'], user_data['MostLikedPlayerPost']])
    
    # Calculate similarity scores for content based on user's interests
    similarity_scores = calculate_similarity_scores(larger_nfl_video_content_df['Tags'], user_search_query)
    larger_nfl_video_content_df['SimilarityScore'] = similarity_scores
    
    # Apply weighted scoring considering likes, views, etc.
    larger_nfl_video_content_df['WeightedScore'] = (larger_nfl_video_content_df['SimilarityScore'] * 0.6 + 
                                        user_data['LikesOnVideos'] * 0.2 + 
                                        user_data['VideoViewDuration'] * 0.2)
    
    # Introduce randomness for diversity
    larger_nfl_video_content_df['FinalScore'] = larger_nfl_video_content_df['WeightedScore'] + np.random.rand(len(larger_nfl_video_content_df)) * diversity_factor
    
    # Sort by final score to get top recommendations
    recommendations = larger_nfl_video_content_df.sort_values(by='FinalScore', ascending=False).head(n_recommendations)
    
    return recommendations['Title'].tolist()


In [14]:
# Assuming 'user_preferences_df' is already loaded with user data
recommendations_for_user_1 = get_advanced_recommendations_with_weighted_similarity_scores_in_larger_datasets(
    user_id=1,
    user_preferences_df=user_preferences_df,  # Ensure this DataFrame is loaded correctly
    larger_nfl_video_content_df=larger_nfl_video_content_df
)

recommendations_for_user_2 = get_advanced_recommendations_with_weighted_similarity_scores_in_larger_datasets(
    user_id=2,
    user_preferences_df=user_preferences_df,  # Ensure this DataFrame is loaded correctly
    larger_nfl_video_content_df=larger_nfl_video_content_df
)

recommendations_for_user_3 = get_advanced_recommendations_with_weighted_similarity_scores_in_larger_datasets(
    user_id=3,
    user_preferences_df=user_preferences_df,  # Ensure this DataFrame is loaded correctly
    larger_nfl_video_content_df=larger_nfl_video_content_df
)

print(f"Recommended Videos: {recommendations_for_user_1}")
print(" ")
print(f"Recommended Videos: {recommendations_for_user_2}")
print(" ")
print(f"Recommended Videos: {recommendations_for_user_3}")


Recommended Videos: ['Patrick Mahomes: Rookie Watchlist 12', 'Derrick Henry: Draft Surprises 31', 'Derrick Henry: Strength Training Routine 141', 'Aaron Rodgers: Top NFL Moments 91', 'Patrick Mahomes: Unbelievable Plays 1']
 
Recommended Videos: ['Derrick Henry: Escaping Pressure 123', 'Derrick Henry: Analyzing Running Technique 55', 'Derrick Henry: The Comeback King 152', 'Aaron Rodgers: Epic Battles 180', 'Patrick Mahomes: Epic Battles 145']
 
Recommended Videos: ['Derrick Henry: Road to 2,000 Yards 185', 'Patrick Mahomes: Historic Match-ups 111', 'Aaron Rodgers: Pre-Game Rituals 135', 'Derrick Henry: NFL Strategy Evolution 151', 'Patrick Mahomes: High School Highlights 119']
