In [None]:
print('Gajraj')

In [None]:
# !pip install mindsdb loguru tictoc
# !pip install git+https://github.com/neuml/txtai

### Lightwood ops

In [None]:
%%writefile fdf.py

"""
2021.07.16

For encoders that already fine-tune on the targets (namely text)
the unity mixer just arg-maxes the output of the encoder.
"""

from typing import List

import torch
import pandas as pd

from lightwood.helpers.log import log
from lightwood.mixer.base import BaseMixer
from lightwood.encoder.base import BaseEncoder
from lightwood.data.encoded_ds import EncodedDs
from lightwood.api.types import PredictionArguments

import asyncio
from ttictoc import tic,toc
from txtai.pipeline import Labels


class FetchDB(BaseMixer):
    def __init__(self, stop_after: float, target_encoder: BaseEncoder):
        super().__init__(stop_after)
        self.target_encoder = target_encoder
        self.supports_proba = False
        self.stable = True
        self.labels = Labels()

    def fit(self, train_data: EncodedDs, dev_data: EncodedDs) -> None:
        tic()
        log.info("Unit Mixer just borrows from encoder")

    def partial_fit(self, train_data: EncodedDs, dev_data: EncodedDs) -> None:
        pass

    def __call__(self, ds: EncodedDs,
                 args: PredictionArguments = PredictionArguments()) -> pd.DataFrame:
        if args.predict_proba:
            # @TODO: depending on the target encoder, this might be enabled
            log.warning('This model does not output probability estimates')

        decoded_predictions: List[object] = []
        # ConcatedEncodedDs([train_data, dev_data]).get_column_original_data("reviewtext")
        # tags = ["Baseball", "Football", "Hockey", "Basketball"]
        for X, _ in ds:
            decoded_prediction = self.target_encoder.decode(torch.unsqueeze(X, 0))
            decoded_predictions.extend(decoded_prediction)

        ydf = pd.DataFrame({"prediction": decoded_predictions})
        
        timeinfo=toc()
        print(f'{gdb} --{timeinfo}')
        return ydf


In [None]:
import pandas as pd
df = pd.read_csv('/workspace/PythonExp/lightwoodexp/airline.csv')
# df=df[['airline_sentiment','text']]
# df=df.rename(columns={"airline_sentiment":"sentiment"})
df=df.iloc[:100]
df.columns

In [None]:
df.to_csv("airline.csv")

In [None]:
from lightwood.api.high_level import ProblemDefinition, json_ai_from_problem, load_custom_module
import pandas as pd

# load the code
load_custom_module('./fdf.py')

# read dataset
df = pd.read_csv('/workspace/PythonExp/lightwoodexp/airline.csv')

# define the predictive task
pdef = ProblemDefinition.from_dict({
    'target': 'sentiment', # column you want to predict
})

# generate the Json AI intermediate representation from the data and its corresponding settings
json_ai = json_ai_from_problem(df, problem_definition=pdef)

# Print it (you can also put it in a file and edit it there)
print(json_ai.to_json())

In [None]:
df.sentiment.unique()

In [None]:
json_ai.model['args']['submodels'] = [
                {
                    "module": "fdf.FetchDB",
                    "args": {
                        "target_encoder": "$encoders[self.target]",
                        "stop_after": "$problem_definition.seconds_per_mixer"
                    }
                }
            ]

In [None]:
print(json_ai.to_json())

In [None]:
from lightwood.api.high_level import code_from_json_ai, predictor_from_code
code = code_from_json_ai(json_ai)
predictor = predictor_from_code(code)
predictor.learn(df)

In [None]:
predictions = predictor.predict(pd.DataFrame({
    'text': ['you are beautyful','are you mad?','Where are you bloodyful']
}))
print(predictions)

### AsyncOps

In [None]:
%%writefile asyncexp.py
import asyncio
from ttictoc import tic,toc
result=None
tic()
async def foo(msg):
    print('Hello')
    await asyncio.sleep(.01)
    print(msg)
    return msg
    
async def manager():
    tasklist=[]
    for i in range(400):
        task=asyncio.create_task(foo(str(i)))
        tasklist.append(task)
    
    data=asyncio.gather(*tasklist)
    print('Result::')
    result=await data
    print('Done')
    return result
    


gdb=asyncio.run(manager())
timeinfo=toc()
print(f'{gdb} --{timeinfo}')

In [None]:
!python asyncexp.py

In [None]:
%%writefile asyncexp.py

import asyncio
from ttictoc import tic,toc
from txtai.pipeline import Labels

labels = Labels()
result=None


tic()

async def foo(text,tags):
    print(text)
    await asyncio.sleep(.001)
    result=tags[labels(text, tags)[0][0]]
    print(result)
    return result
    
async def manager():
    tasklist=[]
    
    data = ["Dodgers lose again, give up 3 HRs in a loss to the Giants",
            "Giants 5 Cardinals 4 final in extra innings",
            "Dodgers drop Game 2 against the Giants, 5-4",
            "Flyers 4 Lightning 1 final. 45 saves for the Lightning.",
            "Slashing, penalty, 2 minute power play coming up",
            "What a stick save!",
            "Leads the NFL in sacks with 9.5",
            "UCF 38 Temple 13",
            "With the 30 yard completion, down to the 10 yard line",
            "Drains the 3pt shot!!, 0:15 remaining in the game",
            "Intercepted! Drives down the court and shoots for the win",
            "Massive dunk!!! they are now up by 15 with 2 minutes to go"]

    tags = ["Baseball", "Football", "Hockey", "Basketball"]
    
    
    for i in data:
        task=asyncio.create_task(foo(str(i),tags))
        tasklist.append(task)
    
    data=asyncio.gather(*tasklist)
    print('Result::')
    result=await data
    print('Done')
    return result
    


gdb=asyncio.run(manager())
timeinfo=toc()
print(f'{gdb} --{timeinfo}')

In [None]:
from joblib import Parallel, delayed
from math import sqrt
from ttictoc import tic,toc
from txtai.pipeline import Labels

labels = Labels()

data = ["Dodgers lose again, give up 3 HRs in a loss to the Giants",
        "Giants 5 Cardinals 4 final in extra innings",
        "Dodgers drop Game 2 against the Giants, 5-4",
        "Flyers 4 Lightning 1 final. 45 saves for the Lightning.",
        "Slashing, penalty, 2 minute power play coming up",
        "What a stick save!",
        "Leads the NFL in sacks with 9.5",
        "UCF 38 Temple 13",
        "With the 30 yard completion, down to the 10 yard line",
        "Drains the 3pt shot!!, 0:15 remaining in the game",
        "Intercepted! Drives down the court and shoots for the win",
        "Massive dunk!!! they are now up by 15 with 2 minutes to go"]

tags = ["Baseball", "Football", "Hockey", "Basketball"]


def foo(text,tags):
    print(text)
    result=tags[labels(text, tags)[0][0]]
    print(result)
    return result

tic()
# data=Parallel(n_jobs=10)(delayed(sqrt)(i**2) for i in range(1000))
data=Parallel(n_jobs=10)(delayed(foo)(i,tags) for i in data)
timeinfo=toc()
print(f'timeinfo --{timeinfo} --{data}')

In [None]:
    # async def _foo(text,tags):
    #     print(text)
    #     await asyncio.sleep(.001)
    #     result=tags[labels(text, tags)[0][0]]
    #     print(result)
    #     return result


    # async def _manager():
    #     tasklist=[]
        
    #     data = ["Dodgers lose again, give up 3 HRs in a loss to the Giants",
    #             "Giants 5 Cardinals 4 final in extra innings",
    #             "Dodgers drop Game 2 against the Giants, 5-4",
    #             "Flyers 4 Lightning 1 final. 45 saves for the Lightning.",
    #             "Slashing, penalty, 2 minute power play coming up",
    #             "What a stick save!",
    #             "Leads the NFL in sacks with 9.5",
    #             "UCF 38 Temple 13",
    #             "With the 30 yard completion, down to the 10 yard line",
    #             "Drains the 3pt shot!!, 0:15 remaining in the game",
    #             "Intercepted! Drives down the court and shoots for the win",
    #             "Massive dunk!!! they are now up by 15 with 2 minutes to go"]

    #     tags = ["Baseball", "Football", "Hockey", "Basketball"]
        
        
    #     for i in data:
    #         task=asyncio.create_task(foo(str(i),tags))
    #         tasklist.append(task)
        
    #     data=asyncio.gather(*tasklist)
    #     print('Result::')
    #     result=await data
    #     print('Done')
    #     return result