### Import Files and libraries ###

In [1]:
import pandas as pd
import dash_bootstrap_components as dbc
from flask import Flask
from jupyter_dash import JupyterDash
from dash_bootstrap_templates import load_figure_template
from pre_processor import PreProcessor, FeatureSelection
from dash_bootstrap_templates import load_figure_template
from dashboard import DashboardLayout, CallbackManager
from models import MachineLearningClassifier


### Data cleaning and pre processing ###

In [2]:
    movies = pd.read_csv('tmdb_reformatted_movies.csv')
    credit = pd.read_csv('tmdb_reformatted_credits.csv')
    data_frame = pd.merge(left=movies, right=credit, how='inner', on='id')
    pre_processor = PreProcessor(data_frame=data_frame)
    pre_processor.set_index('id')
    pre_processor.drop_na(axis=1, how='any')
    pre_processor.rename(columns={'title_x':'title'})
    
    pre_processor.data_frame.rename(columns={'company_name': 'Producer_Company', 'genre': 'Genre', 'cast': 'Cast'}
                                    , inplace=True)
    pre_processor.data_frame['release_date'] = pd.to_datetime(pre_processor.data_frame['release_date'],
                                                              format='%Y-%m-%d')
    pre_processor.data_frame['profit'] = pre_processor.data_frame['revenue'] - pre_processor.data_frame['budget']
    pre_processor.data_frame['profit'] = pre_processor.data_frame['profit'].apply(lambda x: 1 if x > 0 else 0)
    target = pre_processor.data_frame.pop('profit')
    data_frame = pre_processor.data_frame.copy()
    movies = pd.read_csv('tmdb_5000_movies.csv')

### Feature engineering ###

In [None]:
pre_processor.drop_columns(columns=['Country', 'release_date', 'title', 'title_y','department', 'job', 'name'])
pre_processor.normalize()
pre_processor.one_hot_encoder(columns=['Genre'])
feature_selector = FeatureSelection(pre_processor.data_frame)
features = feature_selector.data_frame.select_dtypes(include=['number']).columns
feature_selector.add_log_transform(columns=['budget'])
feature_selector.reduction_dimension_by_pca(scaled_columns=features)
feature_selector.calculate_mutual_inf_class(target=target, number_of_features=12)

 -0.47677409]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  self.data_frame.loc[:, columns] = data
 -0.75654337]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  self.data_frame.loc[:, columns] = data
 -0.87438696]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  self.data_frame.loc[:, columns] = data
 -0.12871766]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  self.data_frame.loc[:, columns] = data
 -0.47413975]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  self.data_frame.loc[:, columns] = data


normalizing_is_done
one hot encoding is done
log transform is done


### Train models and prepare default machine learning data frame  ###

In [None]:
data = list()
model = MachineLearningClassifier(data_frame=feature_selector.data_frame, target=target)
model.train_test_split()
model.fit_xgboost_classifier()
data.append(model.metrics())
model.fit_logistic_regression_classifier()
data.append(model.metrics())
model.fit_decision_tree_classifier()
data.append(model.metrics())
model.fit_random_forest_classifier()
data.append(model.metrics())
df = pd.DataFrame(data=data)
df

### Run Dash board for see result ###

In [None]:
load_figure_template('SLATE')
# print(pre_processor.data_frame)
if __name__ == "__main__":
    server = Flask(__name__)
    server.config.update(  
         broker_url='amqp://guest:guest@localhost//',
         result_backend='rpc://')
    app = JupyterDash(__name__, external_stylesheets=[dbc.themes.SLATE] , server=server)
    dashboard_layout = DashboardLayout(app, data_frame)
    callback_manager = CallbackManager(app, data_frame, machine_learning_data_frame=df, target=target, recommender_data=movies)
    app.run_server(debug=True, port=8080)