In [55]:
import setup_django
setup_django.init()


In [56]:
from django.conf import settings
from ratings.models import Rating

LINKS_SMALL_CSV = settings.DATA_DIR / "links_small.csv"

LINKS_SMALL_CSV.exists()

True

In [4]:
qs = Rating.objects.all().filter(content_object__isnull=True)

FieldError: Field 'content_object' does not generate an automatic reverse relation and therefore cannot be used for reverse querying. If it is a GenericForeignKey, consider adding a GenericRelation.

In [57]:
qs = Rating.objects.all()

In [58]:
missing_movie_ids = []
for instance in qs:
    if instance.content_object is None:
        missing_movie_ids.append(instance.object_id)
        

_total = len(missing_movie_ids)
total_missing = list(set(missing_movie_ids))
print(len(total_missing), _total)

370 1861


In [14]:
import pandas as pd

In [59]:
links_df = pd.read_csv(LINKS_SMALL_CSV)
links_df.head()

Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
1,2,113497,8844.0
2,3,113228,15602.0
3,4,114885,31357.0
4,5,113041,11862.0


In [60]:
ms_df = links_df.copy()[links_df.movieId.isin(total_missing)]
ms_df.head()

Unnamed: 0,movieId,imdbId,tmdbId
607,720,118114,
608,721,114103,
736,915,47437,6620.0
910,1133,111357,
1088,1344,55824,11349.0


In [61]:
ms_df.shape

(370, 3)

In [62]:
ms_df.shape[0] == len(total_missing)

True

In [63]:
def enrich_imdb_col(val):
    val = str(val)
    if len(val) == 7:
        val = f"tt{val}"
        return val
    if len(val) == 6:
        val = f"tt0{val}"
        return val
    if len(val) == 5:
        val = f"tt00{val}"
        return val
    return val
        

In [64]:
ms_df['tt'] = ms_df['imdbId'].apply(enrich_imdb_col)

ms_df.head()

Unnamed: 0,movieId,imdbId,tmdbId,tt
607,720,118114,,tt0118114
608,721,114103,,tt0114103
736,915,47437,6620.0,tt0047437
910,1133,111357,,tt0111357
1088,1344,55824,11349.0,tt0055824


In [65]:
MOVIES_CSV = settings.DATA_DIR / "movies_metadata.csv"

MOVIES_CSV.exists()

True

In [66]:
movies_cols = ['title', 'overview', 'release_date', 'imdb_id']
movies_df = pd.read_csv(MOVIES_CSV, usecols=movies_cols)
movies_df.head()

Unnamed: 0,imdb_id,overview,release_date,title
0,tt0114709,"Led by Woody, Andy's toys live happily in his ...",1995-10-30,Toy Story
1,tt0113497,When siblings Judy and Peter discover an encha...,1995-12-15,Jumanji
2,tt0113228,A family wedding reignites the ancient feud be...,1995-12-22,Grumpier Old Men
3,tt0114885,"Cheated on, mistreated and stepped on, the wom...",1995-12-22,Waiting to Exhale
4,tt0113041,Just when George Banks has recovered from his ...,1995-02-10,Father of the Bride Part II


In [67]:
missing_movies_df = ms_df.merge(movies_df, left_on='tt', right_on='imdb_id')
missing_movies_df.head()

Unnamed: 0,movieId,imdbId,tmdbId,tt,imdb_id,overview,release_date,title
0,915,47437,6620.0,tt0047437,tt0047437,Linus and David Larrabee are the two sons of a...,1954-09-28,Sabrina
1,1344,55824,11349.0,tt0055824,tt0055824,Sam Bowden witnesses a rape committed by Max C...,1962-04-12,Cape Fear
2,2135,61584,16081.0,tt0061584,tt0061584,Get ready for the wildest adventure of a lifet...,1967-12-19,Doctor Dolittle
3,2136,57372,18331.0,tt0057372,tt0057372,"Jerry Lewis directed, co-wrote and starred in ...",1963-06-04,The Nutty Professor
4,2367,74751,10730.0,tt0074751,tt0074751,In this remake of the 1933 classic about the g...,1976-09-08,King Kong


In [68]:
missing_movies_df['id'] = missing_movies_df['movieId']
missing_movies_df['id_alt'] = missing_movies_df['tmdbId'].apply(lambda x: str(int(x)))
missing_movies_df.head()

Unnamed: 0,movieId,imdbId,tmdbId,tt,imdb_id,overview,release_date,title,id,id_alt
0,915,47437,6620.0,tt0047437,tt0047437,Linus and David Larrabee are the two sons of a...,1954-09-28,Sabrina,915,6620
1,1344,55824,11349.0,tt0055824,tt0055824,Sam Bowden witnesses a rape committed by Max C...,1962-04-12,Cape Fear,1344,11349
2,2135,61584,16081.0,tt0061584,tt0061584,Get ready for the wildest adventure of a lifet...,1967-12-19,Doctor Dolittle,2135,16081
3,2136,57372,18331.0,tt0057372,tt0057372,"Jerry Lewis directed, co-wrote and starred in ...",1963-06-04,The Nutty Professor,2136,18331
4,2367,74751,10730.0,tt0074751,tt0074751,In this remake of the 1933 classic about the g...,1976-09-08,King Kong,2367,10730


In [69]:
final_df = missing_movies_df.copy()[['id', 'id_alt', 'title']]
final_df.head()

Unnamed: 0,id,id_alt,title
0,915,6620,Sabrina
1,1344,11349,Cape Fear
2,2135,16081,Doctor Dolittle
3,2136,18331,The Nutty Professor
4,2367,10730,King Kong


In [70]:
alt_id_list = final_df['id_alt'].to_list()

In [71]:
from movies.models import Movie

In [72]:
movies_qs = Movie.objects.filter(id__in=alt_id_list)
movies_qs.count()

39

In [73]:
from django.forms.models import model_to_dict

In [74]:
for obj in movies_qs[:1]:
    data = final_df.copy()[final_df['id_alt'] == str(obj.id)]
    if data.shape[0] == 1:
        og_model_data = model_to_dict(obj)
        print(og_model_data)
        #print(obj.id, data.shape)

{'id': 161, 'title': 'Crimson Tide', 'overview': 'On a US nuclear missile sub, a young first officer stages a mutiny to prevent his trigger happy captain from launching his missiles before confirming his orders to do so.', 'release_date': datetime.date(1995, 5, 12), 'rating_last_updated': datetime.datetime(2023, 3, 29, 7, 55, 52, 998658, tzinfo=datetime.timezone.utc), 'rating_count': 80, 'rating_avg': Decimal('3.88')}


In [46]:
for obj in movies_qs[:1]:
    data = final_df.copy()[final_df['id_alt'] == str(obj.id)]
    if data.shape[0] == 1:
        og_model_data = model_to_dict(obj)
        update_data = data.to_dict('records')[0]
        print(og_model_data)
        print(update_data)
        #print(obj.id, data.shape)

{'id': 2, 'title': 'Ariel', 'overview': "Taisto Kasurinen is a Finnish coal miner whose father has just committed suicide and who is framed for a crime he did not commit. In jail, he starts to dream about leaving the country and starting a new life. He escapes from prison but things don't go as planned...", 'release_date': datetime.date(1988, 10, 21), 'rating_last_updated': datetime.datetime(2023, 3, 29, 6, 21, 3, 825242, tzinfo=datetime.timezone.utc), 'rating_count': 107, 'rating_avg': Decimal('3.49')}
{'id': 4470, 'id_alt': '2', 'title': 'Ariel'}


In [75]:
for obj in movies_qs[:1]:
    data = final_df.copy()[final_df['id_alt'] == str(obj.id)]
    if data.shape[0] == 1:
        og_model_data = model_to_dict(obj)
        update_data = data.to_dict('records')[0]
        if obj.title == update_data.get('title'):
            print(og_model_data)
            og_model_data['id'] = update_data['id']
            new_model_data = {**og_model_data}
            print(new_model_data)
            obj.delete()
            Movie.objects.create(**new_model_data)


In [76]:
movies_qs = Movie.objects.filter(id__in=alt_id_list)

for obj in movies_qs:
    data = final_df.copy()[final_df['id_alt'] == str(obj.id)]
    if data.shape[0] == 1:
        og_model_data = model_to_dict(obj)
        update_data = data.to_dict('records')[0]
        if obj.title == update_data.get('title'):
            og_model_data['id'] = update_data['id']
            new_model_data = {**og_model_data}
            obj.delete()
            Movie.objects.create(**new_model_data)
print("fin")


fin


In [54]:
from ratings.tasks import task_update_movie_ratings
task_update_movie_ratings()

Rating update took 0:01:10 (70.60567855834961s)
