# Compute Volatility vol20 and vol40 using Python columns

In [1]:
from kywy.client.kawa_client import KawaClient as K

kawa = K.load_client_from_environment()
cmd = kawa.commands

Authentication successful on https://demo.kawa.ai:8080, in workspace 2


## 1. Dataset generation

Let's generate a PnL timeseries with the following dimensions:
- portfolio
- stock

In [2]:
import datetime
import numpy as np
import pandas as pd

data = []

start_date = datetime.date(2014,1,1)
day_count = 10 * 365

# Define the different stocks in fictitious portfolios
stocks = {
        'US_STOCKS': ['AAPL','TSLA','MSFT','NVDA','INTC','GOOG','AMZN'],
        'EU_STOCKS': ['BP','HSBC','ING', 'LVMH']   
}

# Define the normal distribution of the PnL for each stock to generate random data
mu_sigma_per_stock = {
    'AAPL': (200,5.1),
    'TSLA': (170,8.1),
    'MSFT': (432,0.3),
    'NVDA': (120,10.2),
    'INTC': (30,3.5),
    'GOOG': (178,1.5),
    'AMZN': (187,0.5),
    'BP': (35,0.5),
    'HSBC': (43,0.5),
    'ING': (17,0.5),
    'LVMH': (158,0.5),
}

# Generate the dataframe
for portfolio, stock_list in stocks.items():
    for stock in stock_list:
        mu, sigma = mu_sigma_per_stock[stock]
        pnl_list = np.random.normal(mu, sigma, day_count)
        index = 0
        for date in (start_date + datetime.timedelta(n) for n in range(day_count)):        
            data.append({
                'portfolio':portfolio,
                'stock':stock,
                'date': date,
                'pnl':pnl_list[index],       
            })
            index+=1
            
df = pd.DataFrame(data)
df

Unnamed: 0,portfolio,stock,date,pnl
0,US_STOCKS,AAPL,2014-01-01,194.278174
1,US_STOCKS,AAPL,2014-01-02,191.622930
2,US_STOCKS,AAPL,2014-01-03,186.563538
3,US_STOCKS,AAPL,2014-01-04,201.000716
4,US_STOCKS,AAPL,2014-01-05,207.163552
...,...,...,...,...
40145,EU_STOCKS,LVMH,2023-12-25,157.511357
40146,EU_STOCKS,LVMH,2023-12-26,157.634141
40147,EU_STOCKS,LVMH,2023-12-27,158.508932
40148,EU_STOCKS,LVMH,2023-12-28,158.208550


## 2. Ingest the generated data into KAWA

In [3]:
loader = kawa.new_data_loader(datasource_name='PNL data', df=df)
loader.create_datasource()

loader.load_data(
    reset_before_insert=True,
    create_sheet=True,
    nb_threads=2
);


Starting an ingestion session with id=483f7a78-7068-4eed-ba02-4da1ce13f956
> Exporting the dataframe into 2 parquet files
> Starting 2 loading threads
> Streaming file /var/folders/rl/6bqlws416nz6z2298zxq22zc0000gn/T/5b16b39f-dc12-42f6-b3c9-1a2b0c9cbf77/__partition__=1/2dab9b63d798425286fee6555f9b62cf-0.parquet to KAWA
> Streaming file /var/folders/rl/6bqlws416nz6z2298zxq22zc0000gn/T/5b16b39f-dc12-42f6-b3c9-1a2b0c9cbf77/__partition__=0/2dab9b63d798425286fee6555f9b62cf-0.parquet to KAWA
> 40150 rows were imported in 0.606295108795166ms
> Import was successfully finalized
Sheet PNL data was created: https://demo.kawa.ai:8080/workspaces/2/sheets/56/views/835


## 3. Create a Python column in KAWA

#### 3.a Create the Python script

Open the script section from your KAWA instance and create the following script


```python
import logging
import pandas as pd
import datetime
from kywy.client.kawa_decorators import outputs, inputs

logger = logging.getLogger('script-logger')

@inputs(key=str, date=datetime.date, pnl=float)
@outputs(vol20=float, vol40=float)
def execute(df: pd.DataFrame):
    
    logger.info('Starting the vol computation')
    
    results = pd.DataFrame(columns=['date', 'key', 'vol20', 'vol40'])
    grouped = df.groupby('key')
    
    for key, group in grouped:
        
        logger.info(f'Computing standard deviations for key: {key}')
        
        group = group.sort_values('date')
        
        group['vol20'] = group['pnl'].rolling(window=20).std()
        group['vol40'] = group['pnl'].rolling(window=40).std()
        
        results = pd.concat([results, group[['date', 'key', 'vol20', 'vol40']]])
    
    return results

```

#### 3.b Create a key column


From within the GUI, create a column that is the concatenation of the stock and the porfolio.
This will be the dimension of your PnL timeseries. Call it `timeseries_dimension`.


#### 3.c Connect the script to your sheet

Next, connect to your sheet and add the script in the control panel via the "+ Controls" > "Button".
Make sure to bind the newly created `timeseries_dimension`, the TS time index (`date`) to the date input and the PNL.
Save and run your script.