In [1]:
import pandas as pd
from sqlalchemy import create_engine
import numpy as np

In [2]:
# Connect to the IMDB_Movies_2021 sqlite database

filepath = "Resources/IMDB_Movies_2021.db"
engine = create_engine(f"sqlite:///{filepath}")
df = pd.read_sql("SELECT AUTHOR, TITLE, REVIEW, RATING FROM REVIEWS", engine)
df

Unnamed: 0,AUTHOR,TITLE,REVIEW,RATING
0,margarida-44311,Not Bad\n,I don't get all the terrible reviews for this ...,5.0
1,joemay-2,What are all the bad reviews about is it a wo...,I cannot believe anyone could give this film l...,8.0
2,nebk,Great White=Jaws Lite\n,Great White is not the worst way to spend 90 m...,4.0
3,kuarinofu,Bare-bones killer shark film\n,Great White is as basic of a killer shark film...,4.0
4,Horror_Flick_Fanatic,"Terrible story, dialogue, and CGI\n","Terrible story, dialogue and CGI. The film has...",4.0
...,...,...,...,...
5445,suryajijvania,More Parts\n,"It's master piece by Zack please part 2,3,4 al...",10.0
5446,shishirkmr-82243,It's a fantastic movie\n,No words to describe. It's awesome. One of the...,10.0
5447,moizsyed-07601,Awesome out standing!\n,Far better than previous one and better editin...,10.0
5448,samun_shrestha,EPIC\n,Why did the studio say no to this masterpiece?...,10.0


In [3]:
# Drop rows with missing values

clean_df = df.dropna().copy()
clean_df

Unnamed: 0,AUTHOR,TITLE,REVIEW,RATING
0,margarida-44311,Not Bad\n,I don't get all the terrible reviews for this ...,5.0
1,joemay-2,What are all the bad reviews about is it a wo...,I cannot believe anyone could give this film l...,8.0
2,nebk,Great White=Jaws Lite\n,Great White is not the worst way to spend 90 m...,4.0
3,kuarinofu,Bare-bones killer shark film\n,Great White is as basic of a killer shark film...,4.0
4,Horror_Flick_Fanatic,"Terrible story, dialogue, and CGI\n","Terrible story, dialogue and CGI. The film has...",4.0
...,...,...,...,...
5445,suryajijvania,More Parts\n,"It's master piece by Zack please part 2,3,4 al...",10.0
5446,shishirkmr-82243,It's a fantastic movie\n,No words to describe. It's awesome. One of the...,10.0
5447,moizsyed-07601,Awesome out standing!\n,Far better than previous one and better editin...,10.0
5448,samun_shrestha,EPIC\n,Why did the studio say no to this masterpiece?...,10.0


In [4]:
# remove the '\n' from the text in TITLE and REVIEW columns

clean_df['TITLE'].replace('\n', '', regex=True, inplace=True)
clean_df['REVIEW'].replace('\n', '', regex=True, inplace=True)
clean_df

Unnamed: 0,AUTHOR,TITLE,REVIEW,RATING
0,margarida-44311,Not Bad,I don't get all the terrible reviews for this ...,5.0
1,joemay-2,What are all the bad reviews about is it a wo...,I cannot believe anyone could give this film l...,8.0
2,nebk,Great White=Jaws Lite,Great White is not the worst way to spend 90 m...,4.0
3,kuarinofu,Bare-bones killer shark film,Great White is as basic of a killer shark film...,4.0
4,Horror_Flick_Fanatic,"Terrible story, dialogue, and CGI","Terrible story, dialogue and CGI. The film has...",4.0
...,...,...,...,...
5445,suryajijvania,More Parts,"It's master piece by Zack please part 2,3,4 al...",10.0
5446,shishirkmr-82243,It's a fantastic movie,No words to describe. It's awesome. One of the...,10.0
5447,moizsyed-07601,Awesome out standing!,Far better than previous one and better editin...,10.0
5448,samun_shrestha,EPIC,Why did the studio say no to this masterpiece?...,10.0


In [5]:
# Create new sqlite database to hold cleaned data

engine = create_engine('sqlite:///Resources/clean_reviews_data.db', echo=True)
sqlite_connection = engine.connect()

2021-08-25 15:26:53,625 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2021-08-25 15:26:53,633 INFO sqlalchemy.engine.base.Engine ()
2021-08-25 15:26:53,636 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2021-08-25 15:26:53,639 INFO sqlalchemy.engine.base.Engine ()


In [6]:
# Convert clean_df to a sqlite table and load it into the new db

sqlite_table = "imdb_reviews"
clean_df.to_sql(sqlite_table, sqlite_connection, if_exists='replace')

2021-08-25 15:26:57,711 INFO sqlalchemy.engine.base.Engine PRAGMA main.table_info("imdb_reviews")
2021-08-25 15:26:57,714 INFO sqlalchemy.engine.base.Engine ()
2021-08-25 15:26:57,720 INFO sqlalchemy.engine.base.Engine PRAGMA temp.table_info("imdb_reviews")
2021-08-25 15:26:57,722 INFO sqlalchemy.engine.base.Engine ()
2021-08-25 15:26:57,729 INFO sqlalchemy.engine.base.Engine 
CREATE TABLE imdb_reviews (
	"index" BIGINT, 
	"AUTHOR" TEXT, 
	"TITLE" TEXT, 
	"REVIEW" TEXT, 
	"RATING" FLOAT
)


2021-08-25 15:26:57,731 INFO sqlalchemy.engine.base.Engine ()
2021-08-25 15:26:57,758 INFO sqlalchemy.engine.base.Engine COMMIT
2021-08-25 15:26:57,761 INFO sqlalchemy.engine.base.Engine CREATE INDEX ix_imdb_reviews_index ON imdb_reviews ("index")
2021-08-25 15:26:57,763 INFO sqlalchemy.engine.base.Engine ()
2021-08-25 15:26:57,774 INFO sqlalchemy.engine.base.Engine COMMIT
2021-08-25 15:26:57,787 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2021-08-25 15:26:57,848 INFO sqlalchemy.engine.base.

In [7]:
# Close connection to the db
sqlite_connection.close()