In [47]:
import numpy as np 
import string
import re
from nltk.corpus import stopwords
import pandas as pd 
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_extraction.text import TfidfTransformer,TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [48]:
movie_data= pd.read_csv('full_movie_data.csv')

In [49]:
movie_data['Summary'] = movie_data['Summary'].astype(str)

In [50]:
def clean_text(text):
    text = re.sub('[^a-zA-Z0-9]', ' ', text) # Replace all non-alphanumeric characters with a space
    return text.lower() # Convert the text to lowercase

movie_data['Clean Summary'] = movie_data['Summary'].apply(clean_text)

In [51]:
import nltk
from nltk.corpus import stopwords

nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

def remove_stop_words(text):
    words = text.split()
    filtered_words = [word for word in words if word not in stop_words]
    return ' '.join(filtered_words)

movie_data['Clean Summary'] = movie_data['Clean Summary'].apply(remove_stop_words)

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/regisacosta/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [52]:
from nltk.stem import PorterStemmer

stemmer = PorterStemmer()

def stem_words(text):
    words = text.split()
    stemmed_words = [stemmer.stem(word) for word in words]
    return ' '.join(stemmed_words)

movie_data['Clean Summary'] = movie_data['Clean Summary'].apply(stem_words)

In [53]:
movie_data

Unnamed: 0,Title,Year,Summary,Short Summary,Genres,Director,Writers,Cast,Clean Summary
0,Patton Oswalt: Annihilation,2017,"Patton Oswald, despite a personal tragedy, pro...","Patton Oswalt, despite a personal tragedy, pro...",Uncategorized,Bobcat Goldthwait,Patton Oswalt,Patton Oswalt,patton oswald despit person tragedi produc bes...
1,New York Doll,2005,A recovering alcoholic and recently converted ...,A recovering alcoholic and recently converted ...,Documentary|Music,Greg Whiteley,Arthur Kane,Sylvain Sylvain,recov alcohol recent convert mormon arthur kil...
2,Mickey's Magical Christmas: Snowed in at the H...,2001,After everyone is snowed in at the House of Mo...,Mickey and all his friends hold their own Chri...,Adventure|Animation|Comedy|Family|Fantasy,Tony Craig,Thomas Hart,Carlos Alazraqui|Wayne Allwine,everyon snow hous mous mickey suggest throw ch...
3,Mickey's House of Villains,2001,The villains from the popular animated Disney ...,The villains from the popular animated Disney ...,Animation|Comedy|Family|Fantasy|Horror,Jamie Mitchell,Thomas Hart,Tony Anselmo|Wayne Allwine,villain popular anim disney film gather hous m...
4,And Then I Go,2017,"In the cruel world of junior high, Edwin suffe...","In the cruel world of junior high, Edwin suffe...",Drama,Vincent Grashaw,Brett Haley,Arman Darbo|Sawyer Barth,cruel world junior high edwin suffer state anx...
...,...,...,...,...,...,...,...,...,...
3935,Skyscraper,2018,FBI Hostage Rescue Team leader and U.S. war ve...,A security expert must infiltrate a burning sk...,Action|Thriller,Rawson Marshall Thurber,Rawson Marshall Thurber,Chin Han|Dwayne Johnson|Neve Campbell,fbi hostag rescu team leader u war veteran saw...
3936,Trench 11,2017,"In the final days of WWI, an allied army unit ...","In the final days of WWI, an allied army unit ...",Horror|Thriller|War,Leo Scherman,Matt Booi,Charlie Carrick|Robert Stadlober|Rossif Suther...,final day wwi alli armi unit led shell shock s...
3937,My Daddy's in Heaven,2017,"Becca, Adam and their 5-year-old daughter Acie...","Becca, Adam and their 5-year-old daughter Acie...",Comedy|Drama|Family,Waymon Boone,Joseph Nasser,Corbin Bernsen|Jenn Gotzon Chandler|Lee Benton,becca adam 5 year old daughter aci perfect fam...
3938,Keeping Up with the Steins,2006,"In toney Brentwood, Benjamin Fiedler prepares ...",A 13-year-old boy uses his upcoming bar mitzva...,Comedy,Scott Marshall,Mark Zakarin,Daryl Sabara|Garry Marshall|Jeremy Piven,toney brentwood benjamin fiedler prepar bar mi...


In [54]:
# Prepare the data for training and testing
X = movie_data['Clean Summary'] # input features
y = movie_data['Title'] # target variable
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [55]:
# Vectorize the text data
vectorizer = TfidfVectorizer(stop_words='english')
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)

In [56]:
# Train a random forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

RandomForestClassifier(random_state=42)

In [60]:
user_input = input("Enter a movie description: ")
clean_input = clean_text(user_input) # assuming you have defined the clean_text function as shown earlier

Enter a movie description: Patton Oswald, despite a personal tragedy, produces his best standup yet.


In [61]:
# Vectorize the user input
input_vec = vectorizer.transform([clean_input])

# Predict the movie title for the user input
predicted_title = rf_model.predict(input_vec)[0]
print("Predicted movie title: ", predicted_title)

Predicted movie title:  Patton Oswalt: Annihilation


In [64]:
import flask

app = flask.Flask(__name__)

@app.route('/', methods=['GET', 'POST'])
def index():
    if flask.request.method == 'POST':
        # Get the user input from the form
        user_input = flask.request.form['description']

        # Clean and vectorize the user input
        clean_input = clean_text(user_input)
        input_vec = vectorizer.transform([clean_input])

        # Predict the movie title for the user input
        predicted_title = rf_model.predict(input_vec)[0]

        return flask.render_template('moviebot.html', prediction=predicted_title)
    
    return flask.render_template('moviebot.html')

if __name__ == '__main__':
    app.run(debug=True, port=8000)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on


 * Running on http://127.0.0.1:8000/ (Press CTRL+C to quit)
 * Restarting with fsevents reloader


SystemExit: 1