# Adding AI to Your App

This notebook provides multiple different approaches to leveraging artificial intelligence in your business. 

In [None]:
!pip install -U scikit-learn
!pip install pandas

## Loading the Data

We use a popular publicly available labelled sentiment analysis dataset to demonstrate the different approaches. 

In [None]:
# import required functions
from os.path import join
from os import listdir

data_dir = "data"
pos_data_dir = join(data_dir, "pos")
neg_data_dir = join(data_dir, "neg")

In [None]:
def load_data(filepath, label):
    files = [join(filepath, filename) for filename in listdir(filepath) if filename.endswith(".txt")]
    records = []
    
    for file in files:
        with open(file) as f:
            text = f.read()
        
        records.append({"text": text, "label": label})
    
    return records

In [None]:
import pandas as pd
from sklearn.utils import shuffle


pos = load_data(pos_data_dir, 1)
neg = load_data(neg_data_dir, 0)

records_df = shuffle(pd.DataFrame(pos + neg)).reset_index(drop=True)

In [None]:
records_df

## Train-Test Split

In [None]:
from sklearn.model_selection import train_test_split

X = records_df["text"]
y = records_df["label"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

### Data vectorisation


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectoriser = TfidfVectorizer(stop_words="english")

In [None]:
x_train = vectoriser.fit_transform(X_train)

### Model Training

In [None]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()

model.fit(x_train, y_train)

In [None]:
y_train_pred = model.predict(x_train)
print(list(y_train_pred))
print(list(y_train))

### Model Testing

In [None]:
x_test = vectoriser.transform(X_test)
y_test_pred = model.predict(x_test)

## Example Prediction

In [None]:
example_text = list(X_test)[1]
example_label = list(y_test)[1]

print("Text: {}\n Actual Label: {}".format(example_text, example_label))

### Predicting on example with our proprietary  model

In [None]:
x_vect = vectoriser.transform([example_text])
y_pred = model.predict(x_vect)
print("Predicted Label: {}".format(y_pred[0]))