# Scratch JSON-formatted Fund Details

In [None]:
import requests
import json
import re
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
urlForFundDetail = "http://api.fund.eastmoney.com/f10/lsjz?fundCode=161725&pageIndex=1&pageSize=9999&startDate=&endDate="
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36', 'Referer': 'http://fundf10.eastmoney.com/'}
httpRequest = requests.get(urlForFundDetail, headers = headers)

In [None]:
httpRequest?

In [None]:
rawJsonString = httpRequest.content.decode('utf-8')
rawJsonString?

# Extract JSON Array of Time Series from JSON String

In [None]:
tidyJsonString = re.sub(r'^.*?([{].+[}]).*$', r'\1', rawJsonString)

In [None]:
tidyJsonString?

# Parse JSON String into pandas.DataFrame

In [None]:
jsonObject = json.loads(tidyJsonString)
jsonObject?

In [None]:
(jsonObject.keys(), len(jsonObject['Data']['LSJZList']), jsonObject['TotalCount'])

In [None]:
df = pd.DataFrame(jsonObject['Data']['LSJZList'])
set(df.columns.to_list())

# Drop Unnecessary Columns & Fix the Index

In [None]:
columns_to_keep = {'FSRQ', 'DWJZ', 'LJJZ', 'JZZZL'}
columns_to_drop = [ key for key in (set(df.columns.to_list()) -  columns_to_keep) ]
df = df.drop(columns = columns_to_drop)

df.index = df.FSRQ
# convert data type from string to numeric
for floatColumn in ['DWJZ', 'LJJZ', 'JZZZL']:
    df[floatColumn] = pd.to_numeric(df[floatColumn], errors = 'coerce')

df

# Fix the Missing Values in Column "JZZZL"

In [None]:
def woody_neighbor_rows(row_index, df, rowCount = 1):
    df_index = df.index
    row_index_loc = df_index.get_loc(row_index)
    return df.loc[df_index[(row_index_loc):(row_index_loc + rowCount)]]

def woody_next_row(row_index, df):
    available_df = woody_neighbor_rows(row_index, df, 2)
    return woody_neighbor_rows(row_index, df, 2).iloc[1] if 2 == available_df.index.size else None

# Test
print(woody_neighbor_rows('2015-06-29', df, 5))
print(woody_neighbor_rows('2015-06-29', df, 1))
print(woody_neighbor_rows('2015-05-27', df, 5))

# Test
woody_next_row('2015-05-29', df)

def fix_nan_in_jzzzl(row, df):
    current_day_data = row
    previous_day_data = woody_next_row(row.name, df) if woody_next_row(row.name, df) is not None else row
    row['JZZZL'] = (current_day_data['LJJZ'] - previous_day_data['LJJZ']) / previous_day_data['DWJZ'] * 100
    # print(row.name, "row['JZZZL']", row['JZZZL'], "\n", "current_day_data['LJJZ']", current_day_data['LJJZ'], "\n", "previous_day_data['LJJZ']", previous_day_data['LJJZ'], "\n\n")
    return row

nan_jzzzl_df = df[df['JZZZL'].isna()]
nan_jzzzl_df = nan_jzzzl_df.apply(lambda row: fix_nan_in_jzzzl(row, df), axis = 1)
df.update(nan_jzzzl_df)
df

# Calculate Ideal DWJZ

In [None]:
df['IDEAL_DWJZ'] = 1.0
ideal_dwjz_series = df['IDEAL_DWJZ']

def make_ideal_dwjz(row, df):
    row_index = row['FSRQ']
    next_row = woody_next_row(row_index, df)
    if next_row is not None:
        next_row_index = next_row['FSRQ']
        change_rate_of_next_trade_day = next_row['JZZZL'] 
        next_ideal_dwjz = ideal_dwjz_series[row_index] * (100.0 + change_rate_of_next_trade_day) / 100.0
        ideal_dwjz_series.at[next_row_index] = next_ideal_dwjz
    else:
        pass
    return row
reversed_df = df.sort_index()
reversed_df['IDEAL_DWJZ'] = 1.0
reversed_df = reversed_df.apply(lambda row: make_ideal_dwjz(row, reversed_df), axis = 1)

df.update(ideal_dwjz_series)
df

# Calculate Periodical Aggregative Metrics

In [None]:
df[['LJJZ_2W_MAX', 'LJJZ_2W_MIN', 'LJJZ_4W_MAX', 'LJJZ_4W_MIN', 'LJJZ_6W_MAX', 'LJJZ_6W_MIN', 'IDEAL_DWJZ_4W_MAX', 'IDEAL_DWJZ_4W_MIN']] = 0.0

def n_week_max(n_week, row_index, df):
    n_week_df = woody_neighbor_rows(row_index, df, n_week * 5)
    return n_week_df['LJJZ'].max()

def n_week_min(n_week, row_index, df):
    n_week_df = woody_neighbor_rows(row_index, df, n_week * 5)
    return n_week_df['LJJZ'].min()

def make_min_max_of_periodical_ljjz(row, df):
    row_index = str(row['FSRQ'])
    
    n_week_df = woody_neighbor_rows(row_index, df, 2 * 5)
    df.at[row['FSRQ'], 'LJJZ_2W_MAX'] = n_week_df['LJJZ'].max()
    df.at[row['FSRQ'], 'LJJZ_2W_MIN'] = n_week_df['LJJZ'].min()
    df.at[row['FSRQ'], 'LJJZ_2W_INC'] = n_week_df['LJJZ'].max()
    df.at[row['FSRQ'], 'LJJZ_2W_DEC'] = n_week_df['LJJZ'].min()
    n_week_df = woody_neighbor_rows(row_index, df, 4 * 5)
    df.at[row['FSRQ'], 'LJJZ_4W_MAX'] = n_week_df['LJJZ'].max()
    df.at[row['FSRQ'], 'LJJZ_4W_MIN'] = n_week_df['LJJZ'].min()
    n_week_df = woody_neighbor_rows(row_index, df, 6 * 5)
    df.at[row['FSRQ'], 'LJJZ_6W_MAX'] = n_week_df['LJJZ'].max()
    df.at[row['FSRQ'], 'LJJZ_6W_MIN'] = n_week_df['LJJZ'].min()
    
    period_by_week = 4
    
    n_week_df = woody_neighbor_rows(row_index, df, period_by_week * 5)
    ideal_dwjz_4w_max = n_week_df['IDEAL_DWJZ'].max()
    ideal_dwjz_4w_min = n_week_df['IDEAL_DWJZ'].min()
    df.at[row_index, 'IDEAL_DWJZ_4W_MAX'] = ideal_dwjz_4w_max
    df.at[row_index, 'IDEAL_DWJZ_4W_MIN'] = ideal_dwjz_4w_min

    row

df.apply(lambda row: make_min_max_of_periodical_ljjz(row, df), axis = 1)

df

# Calculate Inc/Dec Speed in One Month

Calculation Strategy
* Period is 4 weeks
* Find "max/min" IDEAL_DWJZ
* Take each day's IDEAL_DWJZ and above "max/min" to calculate increasement/decreasement rate in the period, which is noted as "inc/dec" amount
* Find the days' interval between each day and its corresponding "max/min" day, which is noted as days
* Calculate "inc/dec" speed using "inc/dec" amount to devide the days' interval

In [None]:
df[['INC_AMOUNT_4W', 'DEC_AMOUNT_4W']] = 0.0
df[['INC_DAYS_INTERVAL_4W', 'DEC_DAYS_INTERVAL_4W']] = 0
df[['INC_4W_SPEED', 'DEC_4W_SPEED']] = 0.0

ideal_dwjz_4w_min_and_max = df[['FSRQ', 'IDEAL_DWJZ_4W_MAX', 'IDEAL_DWJZ_4W_MIN']]

def woody_get_index_by_value(df, column, value):
    return df[df[column] == value].index

def make_inc_dec_speed(row, df):
    row_index = str(row['FSRQ'])
    
    ideal_dwjz_4w_max = row['IDEAL_DWJZ_4W_MAX']
    ideal_dwjz_4w_min = row['IDEAL_DWJZ_4W_MIN']
    ideal_dwjz = row['IDEAL_DWJZ']
    
    inc_amount = (ideal_dwjz - ideal_dwjz_4w_min) / ideal_dwjz_4w_min * 100
    dec_amount = (ideal_dwjz - ideal_dwjz_4w_max) / ideal_dwjz_4w_max * 100
    
    df.at[row_index, 'INC_AMOUNT_4W'] = inc_amount
    df.at[row_index, 'DEC_AMOUNT_4W'] = dec_amount
    
    row_loc = ideal_dwjz_4w_min_and_max.index.get_loc(row_index)
    row_4w_max_loc = df.index.get_loc(woody_get_index_by_value(df, 'IDEAL_DWJZ', ideal_dwjz_4w_max)[0])
    row_4w_min_loc = df.index.get_loc(woody_get_index_by_value(df, 'IDEAL_DWJZ', ideal_dwjz_4w_min)[0])
    row_inc_days_interval = abs(row_4w_min_loc - row_loc) + 1
    row_dec_days_interval = abs(row_4w_max_loc - row_loc) + 1
    df.at[row_index, 'INC_DAYS_INTERVAL_4W'] = row_inc_days_interval
    df.at[row_index, 'DEC_DAYS_INTERVAL_4W'] = row_dec_days_interval
    
    df.at[row_index, 'INC_4W_SPEED'] = inc_amount / row_inc_days_interval
    df.at[row_index, 'DEC_4W_SPEED'] = dec_amount / row_dec_days_interval
    
    
df.apply(lambda row: make_inc_dec_speed(row, df), axis = 1)

df.head(50)



In [None]:
df.info()