### Loading Dataset

In [2]:
import pandas as pd
import numpy as np

df = pd.read_csv("movies.csv",encoding="latin1")
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,...,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,...,,,,,,,,,,
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,...,,,,,,,,,,
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,...,,,,,,,,,,
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,...,,,,,,,,,,
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,...,,,,,,,,,,


In [3]:
df.columns

Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description',
       'Unnamed: 12', 'Unnamed: 13', 'Unnamed: 14', 'Unnamed: 15',
       'Unnamed: 16', 'Unnamed: 17', 'Unnamed: 18', 'Unnamed: 19',
       'Unnamed: 20', 'Unnamed: 21', 'Unnamed: 22', 'Unnamed: 23',
       'Unnamed: 24', 'Unnamed: 25'],
      dtype='object')

In [4]:
columns = ['Unnamed: 12','Unnamed: 13','Unnamed: 14','Unnamed: 15','Unnamed: 16','Unnamed: 17','Unnamed: 18','Unnamed: 19','Unnamed: 20','Unnamed: 21','Unnamed: 22','Unnamed: 23','Unnamed: 24','Unnamed: 25']
df = df.drop(columns, axis = 1)
df.columns

Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description'],
      dtype='object')

### Data cleaning & Basic EDA

In [5]:
df = df.dropna().reset_index(drop=True)
len(df)

5334

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5334 entries, 0 to 5333
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   show_id       5334 non-null   object
 1   type          5334 non-null   object
 2   title         5334 non-null   object
 3   director      5334 non-null   object
 4   cast          5334 non-null   object
 5   country       5334 non-null   object
 6   date_added    5334 non-null   object
 7   release_year  5334 non-null   int64 
 8   rating        5334 non-null   object
 9   duration      5334 non-null   object
 10  listed_in     5334 non-null   object
 11  description   5334 non-null   object
dtypes: int64(1), object(11)
memory usage: 500.2+ KB


In [7]:
df.duplicated().sum()

0

In [8]:
df['type'].unique()

array(['Movie', 'TV Show'], dtype=object)

In [9]:
df['rating'].unique()

array(['TV-MA', 'TV-14', 'PG-13', 'PG', 'R', 'TV-PG', 'G', 'TV-Y7',
       'TV-G', 'TV-Y', 'NC-17', 'NR', 'TV-Y7-FV', 'UR', 'A'], dtype=object)

In [10]:
temp = list(df['listed_in'].unique())
temp2 = [i.split(", ") for i in temp]

temp3 = []
for i in temp2:
  for j in i:
    temp3.append(j)

category = []
for i in temp3:
  if i not in category:
    category.append(i)
print(category)

['Dramas', 'Independent Movies', 'International Movies', 'British TV Shows', 'Reality TV', 'Comedies', 'Romantic Movies', 'Horror Movies', 'Sci-Fi & Fantasy', 'Thrillers', 'Action & Adventure', 'Classic Movies', 'Anime Features', 'Children & Family Movies', 'Documentaries', 'Music & Musicals', 'Cult Movies', 'Faith & Spirituality', 'LGBTQ Movies', 'Crime TV Shows', 'International TV Shows', 'TV Dramas', 'Romantic TV Shows', 'Spanish-Language TV Shows', 'Stand-Up Comedy', 'TV Shows', 'Movies', 'TV Comedies', 'Sports Movies', 'TV Action & Adventure', 'TV Sci-Fi & Fantasy', 'TV Mysteries', 'Docuseries', 'Anime Series', 'TV Horror', 'Teen TV Shows', 'Korean TV Shows', 'Stand-Up Comedy & Talk Shows', "Kids' TV", 'Classic & Cult TV', 'TV Thrillers', 'Science & Nature TV', 'Sci-fi', 'Horror', 'Action', 'Drama', 'Romance', 'Thriller']


In [11]:
len(category)

48

In [12]:
# Distribution of types (Movies vs TV Shows)
print("Distribution of types:", df['type'].value_counts())

# Distribution of ratings
print("\nRatings distribution:", df['rating'].value_counts())

Distribution of types: type
Movie      5186
TV Show     148
Name: count, dtype: int64

Ratings distribution: rating
TV-MA       1823
TV-14       1214
R            778
PG-13        470
TV-PG        431
PG           275
TV-G          84
TV-Y7         76
TV-Y          76
NR            58
G             40
TV-Y7-FV       3
UR             3
NC-17          2
A              1
Name: count, dtype: int64


### Structuring MultiArm Bandit

In [25]:
# Parameters
epsilon = 0.1  # Exploration probability
n_arms = len(df['show_id'].unique())  # Number of arms (shows)
arms_rewards = np.zeros(n_arms)  # Reward for each arm
arms_counts = np.zeros(n_arms)  # no. of times each arm was pulled

# Map 'show_id' to index for arms
show_map = {show_id: index for index, show_id in enumerate(df['show_id'].unique())}

In [26]:
import random

# Function to recommend a show based on epsilon-greedy strategy
def recommend_show(user_id):
    if random.uniform(0, 1) < epsilon:
        # Explore: Recommend a random show
        arm = random.choice(df['show_id'].unique())
        show = df[df['show_id'] == arm].iloc[0]
        print(f"User {user_id} (Exploration): Recommending show '{show['title']}'")
    else:
        # Exploit: Recommend the show with the highest average reward
        arm = np.argmax(arms_rewards / (arms_counts + 1e-10))
        show = df[df['show_id'] == list(show_map.keys())[arm]].iloc[0]
        print(f"User {user_id} (Exploitation): Recommending show '{show['title']}'")

    return show, show_map[show['show_id']]

In [27]:
# Simulate user interaction (random feedback based on genre/rating preferences)
def simulate_user_interaction(user_id, show, arm):
    # Simulate reward based on genre or rating
    reward = np.random.choice([0, 1])  # 1-liked, 0-disliked

    # Update rewards and counts for the selected show (arm)
    arms_rewards[arm] += reward
    arms_counts[arm] += 1

    print(f"User {user_id} watched '{show['title']}' and {'liked' if reward == 1 else 'disliked'} it.")

    return reward

### Simulating Recommendations

In [28]:
# Simulate recommendations for multiple users
n_simulations = 100  # Number of users or interactions
recommendation_history = []  # To store recommendation results for analysis

for user_id in range(1, n_simulations + 1):
    show, arm = recommend_show(user_id)
    reward = simulate_user_interaction(user_id, show, arm)
    recommendation_history.append((user_id, show['title'], reward, arms_counts[arm]))

User 1 (Exploration): Recommending show 'A Private War'
User 1 watched 'A Private War' and liked it.
User 2 (Exploitation): Recommending show 'A Private War'
User 2 watched 'A Private War' and disliked it.
User 3 (Exploration): Recommending show 'The Daughter'
User 3 watched 'The Daughter' and liked it.
User 4 (Exploitation): Recommending show 'The Daughter'
User 4 watched 'The Daughter' and liked it.
User 5 (Exploitation): Recommending show 'The Daughter'
User 5 watched 'The Daughter' and liked it.
User 6 (Exploitation): Recommending show 'The Daughter'
User 6 watched 'The Daughter' and liked it.
User 7 (Exploitation): Recommending show 'The Daughter'
User 7 watched 'The Daughter' and disliked it.
User 8 (Exploitation): Recommending show 'The Daughter'
User 8 watched 'The Daughter' and disliked it.
User 9 (Exploitation): Recommending show 'The Daughter'
User 9 watched 'The Daughter' and liked it.
User 10 (Exploitation): Recommending show 'The Daughter'
User 10 watched 'The Daughter' a

### Summary of Results

In [29]:
print("\nFinal Results:")
for i, reward in enumerate(arms_rewards):
    if arms_counts[i] > 0:
        show_title = df[df['show_id'] == list(show_map.keys())[i]].iloc[0]['title']
        print(f"Show '{show_title}': Total reward: {reward}, Pulled {arms_counts[i]} times")


Final Results:
Show 'A Private War': Total reward: 11.0, Pulled 23.0 times
Show 'Tony Manero': Total reward: 22.0, Pulled 40.0 times
Show 'Miss Americana': Total reward: 0.0, Pulled 1.0 times
Show 'Why Me?': Total reward: 2.0, Pulled 2.0 times
Show 'Uyare': Total reward: 2.0, Pulled 4.0 times
Show 'Hantu Kak Limah': Total reward: 1.0, Pulled 2.0 times
Show 'Heroine': Total reward: 1.0, Pulled 2.0 times
Show 'The Polka King': Total reward: 0.0, Pulled 1.0 times
Show 'Haraamkhor': Total reward: 0.0, Pulled 1.0 times
Show 'Bean: The Ultimate Disaster Movie': Total reward: 1.0, Pulled 2.0 times
Show 'Casino Royale': Total reward: 0.0, Pulled 1.0 times
Show 'The Daughter': Total reward: 9.0, Pulled 19.0 times
Show 'The Spy Next Door': Total reward: 1.0, Pulled 2.0 times
