In [12]:
import pandas as pd

df = pd.read_csv('~/Downloads/DEMO.csv', index_col=None, header=0)
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['release_script'] = pd.to_datetime(df['release_script'])
bool(df.loc[0, 'timestamp'] < df.loc[0, 'release_script'])

True

In [2]:
df['date'] = [d.date() for d in df['timestamp']]
df['time'] = [d.time() for d in df['timestamp']]
df['price'] = df['close']

In [11]:
import numpy as np
def rolling_volatility(df, intvl=5):
    '''
    
    :param df: stock data frame, columns must have ['date', 'time', 'price'] 
    :param int: time interval, unit in minute
    :return: return a new data frame with its volatility
    '''
    
    data = df.copy()
    output = pd.DataFrame()
    
    for date in set(df.loc[:, 'date']):
        temp_df1 = data.loc[data.date == date, :]
        temp_df1['ind'] = temp_df1.index

        temp_df1 = temp_df1.groupby(temp_df1.index//intvl).mean()
        temp_df2 = temp_df1.set_index('ind')
        temp_df2.loc[:, 'time'] = df.loc[temp_df2.index, 'time']

        s_i = temp_df2['price']
        s_i_1 = temp_df2['price'].shift(1)
        temp_df2['u_sequence'] = np.log(s_i/s_i_1)
        s = temp_df2['u_sequence'].rolling(window=len(set(temp_df1.index//intvl)), center=False).std()
        #data.loc[data.date == date, str(intvl)+'_min_vol'] 
        temp_df2.loc[:, str(intvl)+'_min_vol'] = s * np.sqrt(len(set(temp_df1.index//intvl)))
        
        output = pd.concat([output,temp_df2])
    output.reset_index(drop=True,inplace=True)
    return output

def volatility(df, intvl=5):
    
    data = df.copy()
    avg_df = pd.DataFrame()
    output = pd.DataFrame(columns=['vol_before', 'vol_after'])
    
    for date in set(df.loc[:, 'date']):
        temp_df1 = data.loc[data.date == date, :]
        temp_df1['ind'] = temp_df1.index
        temp_df1 = temp_df1.groupby(temp_df1.index//intvl).mean()
        temp_df2 = temp_df1.set_index('ind')
        temp_df2.loc[:, 'timestamp'] = df.loc[temp_df2.index, 'timestamp']
        temp_df2.loc[:, 'release_script'] = df.loc[temp_df2.index, 'release_script']
        
        avg_df = pd.concat([avg_df,temp_df2])
    
    for date in set(avg_df.loc[:, 'release_script']):
        temp_df3 = avg_df.loc[avg_df.release_script == date,:]
        vol_before = temp_df3.loc[temp_df3.timestamp < temp_df3.release_script, 'price'].std()
        vol_after = temp_df3.loc[temp_df3.timestamp > temp_df3.release_script, 'price'].std()
        
        output.loc[date, :] = [vol_before, vol_after]
        output.index.name = 'date'
        output.sort_index(inplace=True)
    
    return output


def vol(df, mode):
    '''
    
    :param df: before or after stock data frame
    :param mode: 1 - before
                 0 - after
    :return: 
    '''
    
    intvl_list = [5,10,30,60,120]
    dic = {}
    
    for i in intvl_list:
        if mode:
            try:
                dic[str(i)+'_before'] = (df.iloc[-i, 7] - df.iloc[-1, 7]) / df.iloc[-1, 7]
            except:
                pass
    
        else: 
            try:
                dic[str(i)+'_after'] = (df.iloc[i, 7].max() - df.iloc[0 , 7]) / df.iloc[0, 7]
            except:
                pass
    
    return dic


def auto_vol(df):
    
    data = df.copy()
    output = pd.DataFrame(columns=['10_after', '5_after', '120_before', '30_after', 
                                   '5_before', '60_before', '60_after', '10_before', '30_before', '120_after'])
        
    for date in set(data.loc[:, 'release_script']):
        
        temp_df1 = data.loc[data.release_script == date, :]
        
        df_before = temp_df1.loc[temp_df1.timestamp < temp_df1.release_script, :]
        
        df_after = temp_df1.loc[temp_df1.timestamp > temp_df1.release_script, :]
        
        dic ={**vol(df_before,1), **vol(df_after, 0)}
        
        for key in dic.keys():
            output.loc[date, key] = dic[key]
    
    return output
        

auto_vol(df)

Unnamed: 0,10_after,5_after,120_before,30_after,5_before,60_before,60_after,10_before,30_before,120_after
2012-12-24 10:30:00,0.00372317,0.00131406,,-0.00087604,-0.00218914,0.00175131,-0.00043802,-0.00394046,-0.0131349,
2015-10-28 10:00:00,,,0.00654837,,0.000211238,0.00211238,,-0.000633714,-0.00190114,
2016-04-28 10:00:00,,,0.000373902,,-0.000934754,0.00169564,,0.00149561,0.00186951,
2013-07-31 10:00:00,0.0181143,0.013934,,0.0404087,-0.00348594,0.0239368,0.0427311,-0.00766907,-0.0118522,
2014-02-05 10:00:00,,,-0.0115348,,-0.00448574,-0.0144185,,-0.00544697,-0.00448574,
2016-07-27 10:00:00,,,-0.00196569,,0.000714796,0.00187634,,0.000536097,0.000893495,
2012-07-25 09:00:00,-0.042042,-0.02002,0.0123579,-0.0865866,0.0103806,0.0081562,-0.0980981,0.00617894,0.00395452,
2011-11-02 10:00:00,-0.00489331,-0.00930531,,-0.00128349,0.00160578,0.018065,-0.00770095,0.00642312,0.0160578,
2014-04-30 10:00:00,,,-0.00782937,,0.000809935,0.00134989,,0.00215983,0.00404968,
2013-04-25 10:00:00,-0.00487329,0.000487329,-0.00195886,-0.00438596,-0.00391773,-0.00391773,0.00146199,0.0,0.0186092,
