In [1]:
import pandas as pd
from urllib.parse import urlparse
import re

In [2]:
def extract_title_from_url_expert(url):
    path = urlparse(url).path
    title = path.split('/')[-1]
    return title.replace('-', ' ').title()
df_expert = pd.read_csv('ExpertReviewsClean43LIWC.txt', delimiter='\t', encoding='ISO-8859-1')
df_MovieTable = pd.read_csv('allMoviesWithUniqueID.csv', sep=None, engine='python', encoding='ISO-8859-1')

# I named the dataframe of Jayy's table as "df_MovieTable" and I change the column name to "Movie_ID" for it was "ID" in the csv
df_MovieTable = df_MovieTable.rename(columns={"ID": "Movie_ID"})
df_expert['title'] = df_expert['url'].apply(extract_title_from_url_expert).str.lower()


In [3]:
df_expert['ExpertReview_ID'] = range(1, len(df_expert) + 1)

In [4]:
# I use the same codes as Jayys' to clean the title so that to avoid errors
def clean_text(text):
    if pd.isna(text):  # Check if the text is NaN
        return ''  # Return an empty string or handle NaN as needed
    # Remove special characters using regex (keeping only alphanumeric characters and spaces)
    text = re.sub(r'[^a-z0-9\s]', ' ', text)
    return text

df_expert['normalized_title'] = df_expert['title'].apply(clean_text)
df_MovieTable['normalized_title'] = df_MovieTable['TITLE'].apply(clean_text)
df_expert = df_expert.merge(df_MovieTable[['normalized_title', 'Movie_ID']], 
                                          on='normalized_title', how='left')

print(df_expert.head())


                                        url  idvscore            reviewer  \
0  https://www.metacritic.com/movie/bronson     100.0    "Andrew O'Hehir"   
1  https://www.metacritic.com/movie/bronson      90.0        'A.O. Scott'   
2  https://www.metacritic.com/movie/bronson      90.0                None   
3  https://www.metacritic.com/movie/bronson      83.0       'Noel Murray'   
4  https://www.metacritic.com/movie/bronson      80.0   'Joshua Rothkopf'   

   dateP                                                Rev  WC  Analytic  \
0   None   'Bronson owes a little or a lot to Kubrick s ...  25     73.88   
1   None   'Bronson invites you to admire its protagonis...  30     13.07   
2   None   'Whether it s Peterson/Bronson s more theatri...  40     72.69   
3   None   'There are two Bronsons on display here: the ...  39     65.46   
4   None   'Refn has somehow found his way to an authent...  24     88.46   

   Clout  Authentic   Tone  ...  Exclam   Dash  Quote  Apostro  Parenth  \

In [5]:
df_Movie_Expert = df_expert[['Movie_ID', 'ExpertReview_ID']]
df_Expert_Review = df_expert[['ExpertReview_ID', 'url', 'posemo', 'negemo']]

In [6]:
df_Movie_Expert = df_Movie_Expert.dropna()
df_Movie_Expert['Movie_ID'] = df_Movie_Expert['Movie_ID'].astype(int)
df_Movie_Expert['ExpertReview_ID'] = df_Movie_Expert['ExpertReview_ID'].astype(int)

In [7]:
print(df_Movie_Expert.head())

   Movie_ID  ExpertReview_ID
0     19419                1
1     19419                2
2     19419                3
3     19419                4
4     19419                5


In [8]:
df_Movie_Expert.to_csv('Movie_Expert.csv', index=False, encoding='ISO-8859-1')

In [9]:
import psycopg2
from psycopg2 import sql

In [10]:
conn = psycopg2.connect(
    dbname='DMGroupAssignment',
    user='postgres',
    password='201026',
    host='localhost',
    port='5432'
)

cursor = conn.cursor()

create_table_query = """
CREATE TABLE expert_reviews (
    ExpertReview_ID INTEGER PRIMARY KEY,
    Posemo FLOAT NOT NULL,
    Negemo FLOAT NOT NULL
);
"""

cursor.execute(create_table_query)

conn.commit()

insert_query = """
INSERT INTO expert_reviews (ExpertReview_ID, posemo, negemo)
VALUES ( %s, %s, %s)
ON CONFLICT (ExpertReview_ID) DO UPDATE
SET Posemo = EXCLUDED.Posemo,
    Negemo = EXCLUDED.Negemo;
"""
for index, row in df_Expert_Review.iterrows():
    cursor.execute(insert_query, (row['ExpertReview_ID'], row['posemo'], row['negemo']))


conn.commit()

cursor.close()
conn.close()

In [11]:
conn = psycopg2.connect(
    dbname='DMGroupAssignment',
    user='postgres',
    password='201026',
    host='localhost',
    port='5432'
)

cursor = conn.cursor()

create_table_query = """
CREATE TABLE Movie_Expert (
    ExpertReview_ID INTEGER PRIMARY KEY,
    Movie_ID INTEGER NOT NULL
);
"""

cursor.execute(create_table_query)

conn.commit()

insert_query = """
INSERT INTO Movie_Expert (ExpertReview_ID, Movie_ID)
VALUES ( %s, %s)
ON CONFLICT (ExpertReview_ID) DO UPDATE
SET Movie_ID = EXCLUDED.Movie_ID;
"""
for index, row in df_Movie_Expert.iterrows():
    cursor.execute(insert_query, (int(row['ExpertReview_ID']), int(row['Movie_ID'])))


conn.commit()

cursor.close()
conn.close()

In [13]:
conn = psycopg2.connect(
    dbname='DMGroupAssignment',
    user='postgres',
    password='201026',
    host='localhost',
    port='5432'
)

cursor = conn.cursor()

create_table_query = """
CREATE TABLE Movie_Table (
    Movie_ID INTEGER PRIMARY KEY,
    Title TEXT NOT NULL
);
"""

cursor.execute(create_table_query)

conn.commit()

insert_query = """
INSERT INTO Movie_Table (Movie_ID, Title)
VALUES ( %s, %s)
ON CONFLICT (Movie_ID) DO UPDATE
SET Title = EXCLUDED.Title;
"""
for index, row in df_MovieTable.iterrows():
    cursor.execute(insert_query, (int(row['Movie_ID']), row['TITLE']))


conn.commit()

cursor.close()
conn.close()