In [1]:
import pandas as pd
import requests
from PIL import Image
import numpy as np
import io
import tensorflow as tf
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
def load_dataset(dataset_path):
    df = pd.read_csv(dataset_path)
    return df

In [3]:
def download_image(url):
    response = requests.get(url)
    img = Image.open(io.BytesIO(response.content))
    return img

In [4]:
def preprocess_image(img):
    img = img.resize((224, 224))
    img_array = np.array(img)
    img_array = preprocess_input(img_array)
    img_array = np.expand_dims(img_array, axis=0)
    return img_array

In [5]:
def extract_features(img_array):
    model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
    features = model.predict(img_array)
    return features

In [6]:
def calculate_similarity(features1, features2):
    return cosine_similarity(features1.reshape(1, -1), features2.reshape(1, -1))[0, 0]

In [27]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Poster_Link    1000 non-null   object 
 1   Series_Title   1000 non-null   object 
 2   Released_Year  1000 non-null   object 
 3   Certificate    899 non-null    object 
 4   Runtime        1000 non-null   object 
 5   Genre          1000 non-null   object 
 6   IMDB_Rating    1000 non-null   float64
 7   Overview       1000 non-null   object 
 8   Meta_score     843 non-null    float64
 9   Director       1000 non-null   object 
 10  Star1          1000 non-null   object 
 11  Star2          1000 non-null   object 
 12  Star3          1000 non-null   object 
 13  Star4          1000 non-null   object 
 14  No_of_Votes    1000 non-null   int64  
 15  Gross          831 non-null    object 
dtypes: float64(2), int64(1), object(13)
memory usage: 125.1+ KB


In [22]:
def find_top_similar_series_titles(user_selected_poster_url, dataset):
    # Load dataset
    #dataset = load_dataset(dataset_path)

    # Download and preprocess features for the user-selected poster (x)
    user_selected_img = download_image(user_selected_poster_url)
    user_selected_img_array = preprocess_image(user_selected_img)
    user_selected_features = extract_features(user_selected_img_array)

    # Download and preprocess features for each poster in the dataset
    dataset_features = {}
    for index, row in dataset.iterrows():
        poster_url = row['Poster_Link']
        img = download_image(poster_url)
        img_array = preprocess_image(img)
        features = extract_features(img_array)
        dataset_features[row['Series_Title']] = features

    # Compare the user-selected poster with all other posters in the dataset to find the most similar ones
    similarities = []
    for series_title, features in dataset_features.items():
        similarity = calculate_similarity(user_selected_features, features)
        similarities.append((series_title, similarity))

    # Sort the similarity scores and return the top 3 series titles
    similarities.sort(key=lambda x: x[1], reverse=True)
    top_similar_series_titles = [title for title, _ in similarities[:3]]
    return top_similar_series_titles

In [28]:
def recommend_movies(top_similar_series_titles, dataset):
    recommended_movies = []
    for series_title in top_similar_series_titles:
        # Filter movies associated with the series
        series_movies = dataset[dataset['Series_Title'] == series_title]
        # Sort movies by rating
        series_movies_sorted = series_movies.sort_values(by='IMDB_Rating', ascending=False)
        # Select top half of the movies
        top_half_movies = series_movies_sorted.head(len(series_movies_sorted))
        # Add selected movies to recommendations
        recommended_movies.extend(top_half_movies['Series_Title'].tolist())
    return recommended_movies

In [9]:
dataset_path = r"G:\CHADUVUKORA_YEDAVA\imdb\imdb_top_1000.csv"

In [10]:
dataset = load_dataset(dataset_path)

In [31]:
x=dataset['Poster_Link'][100]

In [32]:
df1=dataset[10:20]
df1

Unnamed: 0,Poster_Link,Series_Title,Released_Year,Certificate,Runtime,Genre,IMDB_Rating,Overview,Meta_score,Director,Star1,Star2,Star3,Star4,No_of_Votes,Gross
10,https://m.media-amazon.com/images/M/MV5BN2EyZj...,The Lord of the Rings: The Fellowship of the Ring,2001,U,178 min,"Action, Adventure, Drama",8.8,A meek Hobbit from the Shire and eight compani...,92.0,Peter Jackson,Elijah Wood,Ian McKellen,Orlando Bloom,Sean Bean,1661481,315544750.0
11,https://m.media-amazon.com/images/M/MV5BNWIwOD...,Forrest Gump,1994,UA,142 min,"Drama, Romance",8.8,"The presidencies of Kennedy and Johnson, the e...",82.0,Robert Zemeckis,Tom Hanks,Robin Wright,Gary Sinise,Sally Field,1809221,330252182.0
12,https://m.media-amazon.com/images/M/MV5BOTQ5ND...,"Il buono, il brutto, il cattivo",1966,A,161 min,Western,8.8,A bounty hunting scam joins two men in an unea...,90.0,Sergio Leone,Clint Eastwood,Eli Wallach,Lee Van Cleef,Aldo Giuffrè,688390,6100000.0
13,https://m.media-amazon.com/images/M/MV5BZGMxZT...,The Lord of the Rings: The Two Towers,2002,UA,179 min,"Action, Adventure, Drama",8.7,While Frodo and Sam edge closer to Mordor with...,87.0,Peter Jackson,Elijah Wood,Ian McKellen,Viggo Mortensen,Orlando Bloom,1485555,342551365.0
14,https://m.media-amazon.com/images/M/MV5BNzQzOT...,The Matrix,1999,A,136 min,"Action, Sci-Fi",8.7,When a beautiful stranger leads computer hacke...,73.0,Lana Wachowski,Lilly Wachowski,Keanu Reeves,Laurence Fishburne,Carrie-Anne Moss,1676426,171479930.0
15,https://m.media-amazon.com/images/M/MV5BY2NkZj...,Goodfellas,1990,A,146 min,"Biography, Crime, Drama",8.7,The story of Henry Hill and his life in the mo...,90.0,Martin Scorsese,Robert De Niro,Ray Liotta,Joe Pesci,Lorraine Bracco,1020727,46836394.0
16,https://m.media-amazon.com/images/M/MV5BYmU1ND...,Star Wars: Episode V - The Empire Strikes Back,1980,UA,124 min,"Action, Adventure, Fantasy",8.7,After the Rebels are brutally overpowered by t...,82.0,Irvin Kershner,Mark Hamill,Harrison Ford,Carrie Fisher,Billy Dee Williams,1159315,290475067.0
17,https://m.media-amazon.com/images/M/MV5BZjA0OW...,One Flew Over the Cuckoo's Nest,1975,A,133 min,Drama,8.7,A criminal pleads insanity and is admitted to ...,83.0,Milos Forman,Jack Nicholson,Louise Fletcher,Michael Berryman,Peter Brocco,918088,112000000.0
18,https://m.media-amazon.com/images/M/MV5BNjViNW...,Hamilton,2020,PG-13,160 min,"Biography, Drama, History",8.6,The real life of one of America's foremost fou...,90.0,Thomas Kail,Lin-Manuel Miranda,Phillipa Soo,Leslie Odom Jr.,Renée Elise Goldsberry,55291,
19,https://m.media-amazon.com/images/M/MV5BYWZjMj...,Gisaengchung,2019,A,132 min,"Comedy, Drama, Thriller",8.6,Greed and class discrimination threaten the ne...,96.0,Bong Joon Ho,Kang-ho Song,Lee Sun-kyun,Cho Yeo-jeong,Choi Woo-sik,552778,53367844.0


In [35]:
top_similar_series_titles = find_top_similar_series_titles(x, df1)



In [36]:
recommended_movies = recommend_movies(top_similar_series_titles, dataset)
print("Recommended movies:")
print(recommended_movies)

Recommended movies:
['Star Wars: Episode V - The Empire Strikes Back', 'Il buono, il brutto, il cattivo', 'Goodfellas']


In [25]:
top_similar_series_titles

['The Shawshank Redemption',
 'The Lord of the Rings: The Return of the King',
 'The Dark Knight']

In [30]:
df1

Unnamed: 0,Poster_Link,Series_Title,Released_Year,Certificate,Runtime,Genre,IMDB_Rating,Overview,Meta_score,Director,Star1,Star2,Star3,Star4,No_of_Votes,Gross
0,https://m.media-amazon.com/images/M/MV5BMDFkYT...,The Shawshank Redemption,1994,A,142 min,Drama,9.3,Two imprisoned men bond over a number of years...,80.0,Frank Darabont,Tim Robbins,Morgan Freeman,Bob Gunton,William Sadler,2343110,28341469
1,https://m.media-amazon.com/images/M/MV5BM2MyNj...,The Godfather,1972,A,175 min,"Crime, Drama",9.2,An organized crime dynasty's aging patriarch t...,100.0,Francis Ford Coppola,Marlon Brando,Al Pacino,James Caan,Diane Keaton,1620367,134966411
2,https://m.media-amazon.com/images/M/MV5BMTMxNT...,The Dark Knight,2008,UA,152 min,"Action, Crime, Drama",9.0,When the menace known as the Joker wreaks havo...,84.0,Christopher Nolan,Christian Bale,Heath Ledger,Aaron Eckhart,Michael Caine,2303232,534858444
3,https://m.media-amazon.com/images/M/MV5BMWMwMG...,The Godfather: Part II,1974,A,202 min,"Crime, Drama",9.0,The early life and career of Vito Corleone in ...,90.0,Francis Ford Coppola,Al Pacino,Robert De Niro,Robert Duvall,Diane Keaton,1129952,57300000
4,https://m.media-amazon.com/images/M/MV5BMWU4N2...,12 Angry Men,1957,U,96 min,"Crime, Drama",9.0,A jury holdout attempts to prevent a miscarria...,96.0,Sidney Lumet,Henry Fonda,Lee J. Cobb,Martin Balsam,John Fiedler,689845,4360000
5,https://m.media-amazon.com/images/M/MV5BNzA5ZD...,The Lord of the Rings: The Return of the King,2003,U,201 min,"Action, Adventure, Drama",8.9,Gandalf and Aragorn lead the World of Men agai...,94.0,Peter Jackson,Elijah Wood,Viggo Mortensen,Ian McKellen,Orlando Bloom,1642758,377845905
6,https://m.media-amazon.com/images/M/MV5BNGNhMD...,Pulp Fiction,1994,A,154 min,"Crime, Drama",8.9,"The lives of two mob hitmen, a boxer, a gangst...",94.0,Quentin Tarantino,John Travolta,Uma Thurman,Samuel L. Jackson,Bruce Willis,1826188,107928762
7,https://m.media-amazon.com/images/M/MV5BNDE4OT...,Schindler's List,1993,A,195 min,"Biography, Drama, History",8.9,"In German-occupied Poland during World War II,...",94.0,Steven Spielberg,Liam Neeson,Ralph Fiennes,Ben Kingsley,Caroline Goodall,1213505,96898818
8,https://m.media-amazon.com/images/M/MV5BMjAxMz...,Inception,2010,UA,148 min,"Action, Adventure, Sci-Fi",8.8,A thief who steals corporate secrets through t...,74.0,Christopher Nolan,Leonardo DiCaprio,Joseph Gordon-Levitt,Elliot Page,Ken Watanabe,2067042,292576195
9,https://m.media-amazon.com/images/M/MV5BMmEzNT...,Fight Club,1999,A,139 min,Drama,8.8,An insomniac office worker and a devil-may-car...,66.0,David Fincher,Brad Pitt,Edward Norton,Meat Loaf,Zach Grenier,1854740,37030102
