# NLP Sentiment Analysis Experimentation notebook

##### Jupyter helpers:

In [None]:
%reload_ext autoreload
%autoreload

Define imports

In [None]:

from src.data import DataLoader
from src.models import SentimentClassifier
from src.data_processing import DataProcessor
from src.data_processing.text import SpacyTextProcessor
from src.experimentation import MlflowExperimentation
from src.evaluation import Evaluator, EvaluationMetrics


## Load data
*replace MyDataLoader with your DataLoader implementation*

In [None]:
from src.data import NLPSampleDataLoader

data_loader = NLPSampleDataLoader("imdb", 1.0)
data_loader.download_dataset()
imdb_df_train, imdb_df_test = data_loader.get_dataset()

X_train, y_train = imdb_df_train['text'], imdb_df_train['label']
X_test, y_test = imdb_df_test['text'], imdb_df_test['label']

Define experimentation object, which will be used for logging the experiments parameters, metrics and artifacts
*Replace MlflowExperimentation if you use a different experimentation system*

In [None]:
experimentation = MlflowExperimentation()

Create preprocessor for handling data preprocessing, feature engineering etc.

In [None]:
from src.data_processing.text import SpacyTextProcessor

preprocessor = SpacyTextProcessor()

Create model/logic:

In [None]:
my_model = SentimentClassifier(preprocessor = preprocessor)

Define evaluation

In [None]:
class MyEvaluator(Evaluator):
    def evaluate(self, **kwargs) -> EvaluationMetrics:
        pass

evaluator = MyEvaluator()

Run experiment

In [None]:
from src import ExperimentRunner

experiment_runner = ExperimentRunner(
    model=my_model,
    X_train=X_train,
    y_train=y_train,
    X_test=X_test,
    y_test=y_test,
    data_loader=data_loader,
    log_experiment=True,
    experiment_logger=experimentation,
    evaluator=evaluator,
    experiment_name="Experiment",
)

results = experiment_runner.run()
print(results)
