In [15]:
%load_ext autoreload
%autoreload 2

%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np

import model_implementation_pipeline as mip
import features_transformation_pipeline as ftp
from scipy.sparse import hstack

import pickle

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## 1.Creating Inference data from existing raw dataset

In [16]:
project_data = pd.read_csv('/Users/shubhamshivendra/workspace/Project/Donors Choose/Data Sets/train_data.csv', nrows=60000)
resource_data = pd.read_csv('/Users/shubhamshivendra/workspace/Project/Donors Choose/Data Sets/resources.csv')

In [17]:
resource_data.columns

Index(['id', 'description', 'quantity', 'price'], dtype='object')

In [18]:
project_data.columns

Index(['Unnamed: 0', 'id', 'teacher_id', 'teacher_prefix', 'school_state',
       'project_submitted_datetime', 'project_grade_category',
       'project_subject_categories', 'project_subject_subcategories',
       'project_title', 'project_essay_1', 'project_essay_2',
       'project_essay_3', 'project_essay_4', 'project_resource_summary',
       'teacher_number_of_previously_posted_projects', 'project_is_approved'],
      dtype='object')

In [21]:
new_data = project_data[project_data['id'] == 'p001713']

In [9]:
# dropping target variable (in generalized we don't have target variable)
project_data = project_data.drop('project_is_approved', axis = 1)

In [10]:
generalized_data = project_data.iloc[59970:59999]

In [11]:
generalized_data.shape

(29, 16)

In [12]:
def feature_transform(data):
    scaler = pickle.load(open('/Users/shubhamshivendra/workspace/Project/Donors Choose/Data Sets/scaler.pkl', 'rb'))
    num = scaler.transform(data[scaler.feature_names_in_])
    ohe = pickle.load(open('/Users/shubhamshivendra/workspace/Project/Donors Choose/Data Sets/ohe.pkl', 'rb'))
    cat = ohe.transform(data[ohe.feature_names_in_])
    essay_vectorize = ftp.avg_word_2vec(data)
    data_new = hstack((num, cat, essay_vectorize)).tocsr()
    return data_new

In [13]:
def predict_score_inf(project_data, resource_data):
    new_data = ftp.preprocess_data(project_data, resource_data)
    df = feature_transform(new_data)
    log = pickle.load(open('/Users/shubhamshivendra/workspace/Project/Donors Choose/Data Sets/LogisticRegression.pkl', 'rb'))
    y_train_pred = mip.batch_pred(log, df)
    final = mip.predict_with_best_t(y_train_pred, 0.8347440976017594)
    return final

In [22]:
predict_score_inf(new_data, resource_data)

[1]

In [23]:
new_data.dtypes

Unnamed: 0                                       int64
id                                              object
teacher_id                                      object
teacher_prefix                                  object
school_state                                    object
project_submitted_datetime                      object
project_grade_category                          object
project_subject_categories                      object
project_subject_subcategories                   object
project_title                                   object
project_essay_1                                 object
project_essay_2                                 object
project_essay_3                                 object
project_essay_4                                 object
project_resource_summary                        object
teacher_number_of_previously_posted_projects     int64
project_is_approved                              int64
dtype: object

In [14]:
predicted_df = pd.DataFrame([predict_score_inf(generalized_data, resource_data)]).T
predicted_df.columns = ['Predicted']
predicted_df

Unnamed: 0,Predicted
0,1
1,0
2,1
3,1
4,0
5,0
6,1
7,1
8,1
9,0


In [105]:
print(generalized_data.shape)
print(predicted_df.shape)

(29, 16)
(29, 1)


In [114]:
# Appending the dataframe generalized data
final =  pd.concat([generalized_data.reset_index(), predicted_df], axis=1, ignore_index= True)

In [115]:
final

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,59970,14151,p189783,b9c26df385fddfca0e5ea24c9af0f6b7,Ms.,NY,2016-12-29 14:38:10,Grades 6-8,Music & The Arts,Performing Arts,Middle School Drum Circle,Our school is populated with children from all...,The drum is a perfect instrument for middle sc...,,,My students need 10 djembes to be able to part...,13,1
1,59971,150029,p033189,bd2049000d07f47146451280b21b7917,Mrs.,NJ,2017-01-20 07:21:25,Grades PreK-2,"Literacy & Language, Applied Learning","ESL, Early Development",A Little Drama in Our Lives!,My class is made up of 15 children and 3-4 stu...,The furniture that I have chosen will help pro...,,,My students need dramatic play furniture to he...,5,0
2,59972,55124,p107197,6ce9df9062b388c29cca6a6ee68bd7bc,Mrs.,NY,2016-08-08 16:32:20,Grades 3-5,"Math & Science, Music & The Arts","Environmental Science, Visual Arts",Pretty Paper Projects!!,\r\nMy students are very poor students who str...,This project will allow my students to create ...,,,My students need large colored paper rolls for...,5,1
3,59973,35974,p179016,c96d79def0bba46c7ba963602cb7e773,Ms.,CA,2016-09-14 21:22:47,Grades 6-8,"History & Civics, Literacy & Language","History & Geography, Literature & Writing",Pen to Paper Makes for Prepared Scholars!,My phenomenal students come from diverse back...,These materials will allow my students to have...,,,My students need basic supplies to begin the n...,3,1
4,59974,103193,p206960,546cf86b33fc9d7d002a97a462afc9f7,Mr.,CA,2016-08-30 12:26:41,Grades 6-8,"Math & Science, History & Civics","Health & Life Science, Social Sciences",Engaging Native American youth with technology...,These are all Native American students from un...,Our students are consistently asking for bette...,,,My students need technology (tablets and a pro...,0,0
5,59975,129981,p151636,2374ad2b2caa504cfd586c27723106ca,Mrs.,MD,2016-06-24 12:03:04,Grades PreK-2,"Applied Learning, Math & Science","College & Career Prep, Mathematics",Learning Beyond Our City,Our school is faced with a tight spending budg...,Having these laptops and tablet will allow us ...,,,My students need new technology to prepare the...,3,0
6,59976,131219,p014597,a018433a7d294d926ee9cefd177f5069,Ms.,IL,2016-06-21 11:05:54,Grades 6-8,"Literacy & Language, History & Civics","Literature & Writing, Social Sciences",Helping 6th Graders Become Informed Citizens,Sixth grade is a year of many changes for stud...,This donation will provide students with a res...,,,My students need to learn more about current e...,0,1
7,59977,121798,p239818,e88140b2ef27cfac193612585282dcb5,Mrs.,ND,2016-08-29 09:53:05,Grades 3-5,Music & The Arts,Music,Tubano Drums for Longfellow Elementary,I teach first through fifth grade almost all o...,Every year in music class we spend time workin...,,,My students need Remo Tubano Drums so they can...,0,1
8,59978,176693,p003832,98687bf0c7adeabcc749ec92702497b4,Mrs.,IL,2016-11-29 14:58:16,Grades 3-5,Math & Science,Applied Sciences,Generation Z,School-wide ninety-four percent of our student...,"Beginning Computer Science Week, using a Googl...",,,My students need a 3doodler Start Full Educati...,4,1
9,59979,58729,p056491,22656e540653abc605910daca1548065,Ms.,MN,2017-02-22 21:55:11,Grades 3-5,"Math & Science, History & Civics","Mathematics, Social Sciences",Flexible Seating Table,These students are the most thoughtful and giv...,Flexible seating is motivating for students! I...,,,My students need flexible seating to collabora...,1,0
