In [1]:
import pandas as pd
# Replace the following line with your actual DataFrame
df_turkish_drama = pd.read_csv('turkish_drama_data.csv')

turkish_names = df_turkish_drama['Original Title'].dropna().astype(str).tolist()

In [3]:
# Prepare a set of Turkish names for faster lookup
turkish_names_set = set(name.lower() for name in turkish_names)

# Prepare a list to collect matching records
matching_records = []

# Define the chunk size (adjust based on your system's memory capacity)
chunksize = 100000  # You may adjust this number

# Read 'title.akas.tsv.gz' in chunks
for chunk in pd.read_csv('imdb/title.akas.tsv.gz', sep='\t', dtype=str, na_values='\\N',
                         compression='gzip', low_memory=False, chunksize=chunksize, encoding='utf-8'):
    # Ensure 'title' column is of string type and handle missing values
    chunk['title'] = chunk['title'].fillna('')
    chunk['title_lower'] = chunk['title'].str.lower()
    
    # Match Turkish names with the 'title' column
    matches = chunk[chunk['title_lower'].isin(turkish_names_set)]
    
    # Append matches to the list
    matching_records.append(matches[['titleId', 'title']])


In [14]:
# Concatenate all matching records and remove duplicates
matching_akas_df = pd.concat(matching_records).drop_duplicates()


In [15]:
# Rename columns for clarity
matching_akas_df.rename(columns={'titleId': 'IMDb ID', 'title': 'Original Title'}, inplace=True)

matching_akas_df.head()

Unnamed: 0,IMDb ID From Turkish,Original Title
167216,tt0030210,Halka
282362,tt0042906,Halka
304923,tt0045024,Merhamet
345687,tt0048685,Son
530543,tt0065039,Bir Ask Hikayesi


In [16]:
# Remove duplicates in 'Original Title' if necessary
matching_akas_df = matching_akas_df.drop_duplicates(subset=['Original Title'])

# Merge with your original DataFrame
df_turkish_drama = df_turkish_drama.merge(matching_akas_df, on='Original Title', how='left')

# Construct IMDb URLs
df_turkish_drama['IMDb URL from Turkish'] = 'https://www.imdb.com/title/' + df_turkish_drama['IMDb ID'] + '/'


In [19]:
df_turkish_drama.rename(columns={'IMDb ID': 'IMDb ID from Turkish'}, inplace=True)
df_turkish_drama.head()

Unnamed: 0,URL,Original Title,English Title,Genre,Episodes,Broadcast Network,Broadcast Start Date,Broadcast End Date,Production Company,Director,Screen Writer,Synopsis,IMDb ID from Turkish,IMDb URL from Turkish,IMDb ID From Turkish
0,https://www.turkishdrama.com/the-tailor-terzi-...,Terzi,The Tailor,"Drama, Romance",23.0,Netflix,2-May-23,3-Nov-23,OGM Pictures,Cem Karci,,Peyami Dokumaci (Cagatay Ulusoy) is a young an...,tt13317230,https://www.imdb.com/title/tt13317230/,tt13317230
1,https://www.turkishdrama.com/sapphire-safir-tv...,Safir,Sapphire,"Romance, Drama",17.0,Atv,4-Sep-23,Present,NTC Medya,Semih Bagci,,"Gulsoy family is a well-known, wealthy family ...",tt0821803,https://www.imdb.com/title/tt0821803/,tt0821803
2,https://www.turkishdrama.com/omer-tv-series.html,Ömer,Omer,"Drama, Family",34.0,Star TV,9-Jan-23,present,OGM Pictures,Cem Karci,"Gulizar Irmak, Deniz Madanoglu, Sedef Bayburtl...",Omer (Selahattin Pasali) is a young guy in his...,tt22719788,https://www.imdb.com/title/tt22719788/,tt22719788
3,https://www.turkishdrama.com/forevermore-verme...,Vermem Seni Ellere,Forevermore,"Romance, Drama",9.0,Atv,18-Jun-23,13-Aug-23,AKN Film,Ali Balci,"Sehrazat Tunus Tasci, Damla Gucer, Samed Aslan...",Mehmet (Emre Bey) is a young guy who comes fro...,tt27739128,https://www.imdb.com/title/tt27739128/,tt27739128
4,https://www.turkishdrama.com/queen-kralice-tv-...,Kralice,Queen,"Drama, Romance",11.0,Kanal D,6-Apr-23,7-Jun-23,Mednova,"Cevdet Mercan, Serhan Sahin","Serdar Soydan, Kerem Bozok, Ekin Akcay, Nil Gu...",Deniz (Burcu Ozberk) and Ates (Gokhan Alkan) a...,,,


In [20]:
# output the final DataFrame to a CSV file
df_turkish_drama.to_csv('turkish_drama_data_with_imdb.csv', index=False)