In [None]:
print('Gajraj')

In [None]:
%%writefile fdf.py

"""
2021.07.16

For encoders that already fine-tune on the targets (namely text)
the unity mixer just arg-maxes the output of the encoder.
"""

from typing import List

import torch
import pandas as pd

from lightwood.helpers.log import log
from lightwood.mixer.base import BaseMixer
from lightwood.encoder.base import BaseEncoder
from lightwood.data.encoded_ds import EncodedDs
from lightwood.api.types import PredictionArguments


class FetchDB(BaseMixer):
    def __init__(self, stop_after: float, target_encoder: BaseEncoder):
        super().__init__(stop_after)
        self.target_encoder = target_encoder
        self.supports_proba = False
        self.stable = True

    def fit(self, train_data: EncodedDs, dev_data: EncodedDs) -> None:
        log.info("Unit Mixer just borrows from encoder")

    def partial_fit(self, train_data: EncodedDs, dev_data: EncodedDs) -> None:
        pass

    def __call__(self, ds: EncodedDs,
                 args: PredictionArguments = PredictionArguments()) -> pd.DataFrame:
        if args.predict_proba:
            # @TODO: depending on the target encoder, this might be enabled
            log.warning('This model does not output probability estimates')

        decoded_predictions: List[object] = []

        for X, _ in ds:
            decoded_prediction = self.target_encoder.decode(torch.unsqueeze(X, 0))
            decoded_predictions.extend(decoded_prediction)

        ydf = pd.DataFrame({"prediction": decoded_predictions})
        return ydf


In [None]:
import pandas as pd
df = pd.read_csv('/workspace/PythonExp/lightwoodexp/airline.csv')
df=df[['airline_sentiment','text']]
df=df.rename(columns={"airline_sentiment":"sentiment"})
df=df.iloc[:100]

In [None]:
df.to_csv("airline.csv")

In [None]:
from lightwood.api.high_level import ProblemDefinition, json_ai_from_problem, load_custom_module
import pandas as pd

# load the code
load_custom_module('./fdf.py')

# read dataset
# df = pd.read_csv('/workspace/PythonExp/data/airline_sentiment.csv')

# define the predictive task
pdef = ProblemDefinition.from_dict({
    'target': 'sentiment', # column you want to predict
})

# generate the Json AI intermediate representation from the data and its corresponding settings
json_ai = json_ai_from_problem(df, problem_definition=pdef)

# Print it (you can also put it in a file and edit it there)
print(json_ai.to_json())

In [None]:
json_ai.model['args']['submodels'] = [{
    'module': 'fdf.FetchDB',
    "args": {
        "target_encoder": "$encoders[self.target]",
        "stop_after": "$problem_definition.seconds_per_mixer"
    }
}]

In [None]:
print(json_ai.to_json())

In [None]:
from lightwood.api.high_level import code_from_json_ai, predictor_from_code
code = code_from_json_ai(json_ai)
predictor = predictor_from_code(code)
predictor.learn(df)

In [None]:
predictions = predictor.predict(pd.DataFrame({
    'text': ['you are beautyful','are you mad?','Where are you bloodyful']
}))
print(predictions)