In [None]:
import pandas as pd
import numpy as np
from zipfile import ZipFile

In [None]:
# Download test data from github
!wget -nc https://github.com/Turkcell/ITU-AIMLin5GChallenge-2021/raw/main/RLF_Prediction_ITU_AIML_Challenge_Data/RLF_Prediction_ITU_AIML_Challenge_Test_20210125.7z
!7z x RLF_Prediction_ITU_AIML_Challenge_Test_20210125.7z -aos RLF_Prediction_ITU_AIML_Challenge_Test_20210125/


File ‘RLF_Prediction_ITU_AIML_Challenge_Test_20210125.7z’ already there; not retrieving.


7-Zip [64] 16.02 : Copyright (c) 1999-2016 Igor Pavlov : 2016-05-21
p7zip Version 16.02 (locale=en_US.UTF-8,Utf16=on,HugeFiles=on,64 bits,2 CPUs Intel(R) Xeon(R) CPU @ 2.30GHz (306F0),ASM,AES-NI)

Scanning the drive for archives:
  0M Scan         1 file, 4819837 bytes (4707 KiB)

Extracting archive: RLF_Prediction_ITU_AIML_Challenge_Test_20210125.7z
--
Path = RLF_Prediction_ITU_AIML_Challenge_Test_20210125.7z
Type = 7z
Physical Size = 4819837
Headers Size = 277
Method = LZMA2:24
Solid = +
Blocks = 1

  0%     60% 2 . RLF_Prediction_ITU_AIML_Challeng . 210125/RegionA_test_20210125.zip                                                                            Everything is Ok

Folders: 1
Files: 2
Size:       34799058
Compr

In [None]:
# Zip file reading
def read_table_from_zip(zip_path, table_name):
    with ZipFile(zip_path) as zip_file:
        with zip_file.open(table_name) as file:
            df = pd.read_csv(file, sep="\t", index_col=0)
            if "datetime" in df:
                df["datetime"] = pd.to_datetime(df["datetime"])
            return df

In [None]:
data_zip_path = "./RLF_Prediction_ITU_AIML_Challenge_Test_20210125/RegionA_test_20210125.zip"

# Example read for RL-KPIS
validation_rl_kpis = read_table_from_zip(data_zip_path, "rl-kpis.tsv")
print(f"validation_rl_kpis.shape: {validation_rl_kpis.shape}")
print(validation_rl_kpis.datetime.min(), validation_rl_kpis.datetime.max())
validation_rl_kpis.tail(5)

validation_rl_kpis.shape: (50978, 19)
2021-01-25 00:00:00 2021-02-13 00:00:00


Unnamed: 0,type,datetime,tip,mlid,mw_connection_no,site_id,card_type,adaptive_modulation,freq_band,severaly_error_second,error_second,unavail_second,avail_time,bbe,rxlevmax,scalibility_score,capacity,modulation,rlf
50973,NEC,2021-02-13,NEAR,A6FD,1371370,RL_bKZCQ,cardtype5,Enable,f3,0,0,0.0,86400,0.0,-37.5,0.0,495,2048QAM*,False
50974,NEC,2021-02-13,NEAR,A6NA,1371370,RL_bKZCQ,cardtype5,Enable,f3,0,0,0.0,86400,0.0,-37.5,0.0,495,2048QAM*,False
50975,NEC,2021-02-13,FAR,A8FJ,1371440,RL_bKZCQ,cardtype5,Enable,f3,0,0,0.0,86400,0.0,-28.9,0.0,495,2048QAM*,False
50976,NEC,2021-02-13,FAR,A8HV,1371440,RL_bKZCQ,cardtype5,Enable,f3,0,0,0.0,86400,0.0,-28.9,0.0,495,2048QAM*,False
50977,NEC,2021-02-13,NEAR,A4ZO,1348886,RL_bLTQH,cardtype5,Enable,f3,0,0,0.0,86400,0.0,-38.3,0.0,495,2048QAM*,False


In [None]:
rl_kpis=validation_rl_kpis

In [None]:
rl_kpis.shape

(50978, 19)

# Prepare Labels

In [None]:
df_labels = rl_kpis[["datetime", "site_id", "mlid"]]
df_labels.head()
#  Prepare columns for the following days. We will join data with these columns to find RLF
prediction_interval = 5

for i in range(prediction_interval):
  df_labels[f"T+{i+1}"] = df_labels["datetime"] + pd.DateOffset(days=i+1)
df_labels.head()

rl_kpis_view = rl_kpis[["datetime", "site_id", "mlid", "rlf"]]
for i in range(prediction_interval):
  target_day_column_name = f"T+{i+1}"

  df_labels = df_labels.merge(rl_kpis_view, 
                  how = "left", 
                  left_on = ("site_id", "mlid", target_day_column_name),
                  right_on = ("site_id", "mlid", "datetime"),
                  suffixes = ("", "_y")
  )
  df_labels.rename(columns={"rlf": f"{target_day_column_name}_rlf"}, inplace=True)
df_labels.drop(columns=["datetime_y"], inplace=True)
df_labels.head()

# 1 day predict is equal to T+1 rlf
df_labels["1-day-predict"] = df_labels["T+1_rlf"]

# Interval predict (5-day predict) is based on T+1, T+2, T+3, T+4 and T+5
following_days_rlf_columns = [f"T+{i+1}_rlf" for i in range(prediction_interval)]

df_labels["5-day-predict"] = df_labels[following_days_rlf_columns].any(axis=1)
df_labels = df_labels[["datetime", "site_id", "mlid", "1-day-predict", "5-day-predict"]]

print(f"df_labels.shape: {df_labels.shape}")
print(f"df_labels 1-day rlf sum: {df_labels['1-day-predict'].sum()}")
print(f"df_labels 5-day rlf sum: {df_labels['5-day-predict'].sum()}")
df_labels.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


df_labels.shape: (50978, 5)
df_labels 1-day rlf sum: 8
df_labels 5-day rlf sum: 35


Unnamed: 0,datetime,site_id,mlid,1-day-predict,5-day-predict
0,2021-01-25,RL_;ABDV,A0BE,False,False
1,2021-01-25,RL_;ABDV,A0BI,False,False
2,2021-01-25,RL_;ABDV,A5AB,False,False
3,2021-01-25,RL_;ABDV,A8CQ,False,False
4,2021-01-25,RL_;ABDV,A8DQ,False,False


In [None]:
# Now join labels with rl-kpis
rl_kpis_with_labels = rl_kpis.merge(df_labels, 
                                    how="left", 
                                    on=["datetime", "site_id", "mlid"])
rl_kpis_with_labels.head()

Unnamed: 0,type,datetime,tip,mlid,mw_connection_no,site_id,card_type,adaptive_modulation,freq_band,severaly_error_second,error_second,unavail_second,avail_time,bbe,rxlevmax,scalibility_score,capacity,modulation,rlf,1-day-predict,5-day-predict
0,ENK,2021-01-25,FAR,A0BE,1349988,RL_;ABDV,cardtype1,Enable,f3,0,0,0.0,86400,0.0,-33.2,0.0,456,1024QAM,False,False,False
1,ENK,2021-01-25,FAR,A0BI,1349988,RL_;ABDV,cardtype1,Enable,f3,0,0,0.0,86400,0.0,-32.9,0.0,456,1024QAM,False,False,False
2,ENK,2021-01-25,FAR,A5AB,1344018,RL_;ABDV,cardtype4,Enable,f3,0,0,0.0,86400,0.0,-34.7,0.0,406,512QAM,False,False,False
3,NEC,2021-01-25,FAR,A8CQ,1351204,RL_;ABDV,cardtype5,Enable,f2,0,0,0.0,86400,0.0,-40.4,0.0,247,2048QAM*,False,False,False
4,NEC,2021-01-25,FAR,A8DQ,1351204,RL_;ABDV,cardtype5,Enable,f2,0,0,0.0,86400,0.0,-40.4,0.0,247,2048QAM*,False,False,False


In [None]:
rl_kpis_with_labels.shape

(50978, 21)

In [None]:
# Example read for RL-KPIS
met_forecast = read_table_from_zip(data_zip_path, "met-forecast.tsv")
met_forecast.tail(5)

Unnamed: 0,station_no,datetime,report_time,weather_day1,temp_max_day1,temp_min_day1,humidity_max_day1,humidity_min_day1,wind_dir_day1,wind_speed_day1,weather_day2,temp_max_day2,temp_min_day2,humidity_max_day2,humidity_min_day2,wind_dir_day2,wind_speed_day2,weather_day3,temp_max_day3,temp_min_day3,humidity_max_day3,humidity_min_day3,wind_dir_day3,wind_speed_day3,weather_day4,temp_max_day4,temp_min_day4,humidity_max_day4,humidity_min_day4,wind_dir_day4,wind_speed_day4,weather_day5,temp_max_day5,temp_min_day5,humidity_max_day5,humidity_min_day5,wind_dir_day5,wind_speed_day5
15,WS_18403,2021-02-08,morning,overcast clouds,17,13,79.0,61.0,220.0,41.0,rain,17,13,86.0,64.0,213.0,30.0,overcast clouds,19,12,78.0,48.0,207.0,27.0,rain,8,7,91.0,48.0,215.0,34.0,overcast clouds,3,1,61.0,53.0,29.0,20.0
16,WS_18404,2021-02-08,evening,overcast clouds,18,13,80.0,65.0,212.0,43.0,rain,18,13,86.0,67.0,213.0,33.0,overcast clouds,20,12,75.0,51.0,205.0,31.0,rain,10,9,91.0,49.0,212.0,36.0,overcast clouds,5,1,63.0,54.0,359.0,23.0
17,WS_18404,2021-02-08,morning,overcast clouds,18,13,80.0,65.0,212.0,43.0,rain,18,13,86.0,67.0,213.0,33.0,overcast clouds,20,12,75.0,51.0,205.0,31.0,rain,10,9,91.0,49.0,212.0,36.0,overcast clouds,5,1,63.0,54.0,359.0,23.0
18,WS_19111,2021-02-08,evening,overcast clouds,18,11,,,,,rain,18,10,,,,,overcast clouds,19,10,,,,,rain,9,8,,,,,overcast clouds,3,0,,,,
19,WS_19111,2021-02-08,morning,overcast clouds,18,11,,,,,rain,18,10,,,,,overcast clouds,19,10,,,,,rain,9,8,,,,,overcast clouds,3,0,,,,


In [None]:
met_forecast

Unnamed: 0,station_no,datetime,report_time,weather_day1,temp_max_day1,temp_min_day1,humidity_max_day1,humidity_min_day1,wind_dir_day1,wind_speed_day1,weather_day2,temp_max_day2,temp_min_day2,humidity_max_day2,humidity_min_day2,wind_dir_day2,wind_speed_day2,weather_day3,temp_max_day3,temp_min_day3,humidity_max_day3,humidity_min_day3,wind_dir_day3,wind_speed_day3,weather_day4,temp_max_day4,temp_min_day4,humidity_max_day4,humidity_min_day4,wind_dir_day4,wind_speed_day4,weather_day5,temp_max_day5,temp_min_day5,humidity_max_day5,humidity_min_day5,wind_dir_day5,wind_speed_day5
0,WS_17062,2021-02-08,evening,overcast clouds,17,13,79.0,64.0,219.0,44.0,rain,17,12,84.0,66.0,214.0,33.0,overcast clouds,19,11,77.0,51.0,206.0,31.0,rain,9,8,90.0,48.0,214.0,38.0,overcast clouds,4,2,63.0,54.0,31.0,24.0
1,WS_17062,2021-02-08,morning,overcast clouds,17,13,79.0,64.0,219.0,44.0,rain,17,12,84.0,66.0,214.0,33.0,overcast clouds,19,11,77.0,51.0,206.0,31.0,rain,9,8,90.0,48.0,214.0,38.0,overcast clouds,4,2,63.0,54.0,31.0,24.0
2,WS_17063,2021-02-08,evening,overcast clouds,19,12,73.0,54.0,227.0,37.0,rain,17,11,81.0,63.0,232.0,29.0,overcast clouds,19,9,83.0,44.0,209.0,17.0,rain,9,8,81.0,46.0,218.0,29.0,overcast clouds,4,1,63.0,52.0,34.0,22.0
3,WS_17063,2021-02-08,morning,overcast clouds,19,12,73.0,54.0,227.0,37.0,rain,17,11,81.0,63.0,232.0,29.0,overcast clouds,19,9,83.0,44.0,209.0,17.0,rain,9,8,81.0,46.0,218.0,29.0,overcast clouds,4,1,63.0,52.0,34.0,22.0
4,WS_17064,2021-02-08,evening,overcast clouds,18,11,77.0,60.0,225.0,43.0,rain,18,12,81.0,64.0,229.0,32.0,overcast clouds,20,10,82.0,47.0,210.0,25.0,rain,9,8,85.0,47.0,218.0,35.0,overcast clouds,5,3,64.0,54.0,32.0,23.0
5,WS_17064,2021-02-08,morning,overcast clouds,18,11,77.0,60.0,225.0,43.0,rain,18,12,81.0,64.0,229.0,32.0,overcast clouds,20,10,82.0,47.0,210.0,25.0,rain,9,8,85.0,47.0,218.0,35.0,overcast clouds,5,3,64.0,54.0,32.0,23.0
6,WS_17065,2021-02-08,evening,overcast clouds,18,12,77.0,57.0,224.0,39.0,rain,17,10,85.0,61.0,227.0,29.0,overcast clouds,20,9,82.0,44.0,211.0,23.0,rain,9,8,90.0,48.0,218.0,31.0,overcast clouds,5,1,62.0,53.0,29.0,18.0
7,WS_17065,2021-02-08,morning,overcast clouds,18,12,77.0,57.0,224.0,39.0,rain,17,10,85.0,61.0,227.0,29.0,overcast clouds,20,9,82.0,44.0,211.0,23.0,rain,9,8,90.0,48.0,218.0,31.0,overcast clouds,5,1,62.0,53.0,29.0,18.0
8,WS_17610,2021-02-08,evening,overcast clouds,20,13,72.0,50.0,227.0,45.0,rain,18,11,82.0,58.0,232.0,33.0,overcast clouds,20,9,78.0,47.0,202.0,23.0,rain,10,7,81.0,53.0,222.0,33.0,overcast clouds,5,0,61.0,56.0,341.0,22.0
9,WS_17610,2021-02-08,morning,overcast clouds,20,13,72.0,50.0,227.0,45.0,rain,18,11,82.0,58.0,232.0,33.0,overcast clouds,20,9,78.0,47.0,202.0,23.0,rain,10,7,81.0,53.0,222.0,33.0,overcast clouds,5,0,61.0,56.0,341.0,22.0


In [None]:
# Retaining only 5th day's weather data
to_drop = [c for c in met_forecast.columns 
           if ('day5' not in c) and (c not in ['station_no', 'datetime', 'report_time'])]
met_forecast_v2 = met_forecast.drop(columns=to_drop)
met_forecast_v2.head()

Unnamed: 0,station_no,datetime,report_time,weather_day5,temp_max_day5,temp_min_day5,humidity_max_day5,humidity_min_day5,wind_dir_day5,wind_speed_day5
0,WS_17062,2021-02-08,evening,overcast clouds,4,2,63.0,54.0,31.0,24.0
1,WS_17062,2021-02-08,morning,overcast clouds,4,2,63.0,54.0,31.0,24.0
2,WS_17063,2021-02-08,evening,overcast clouds,4,1,63.0,52.0,34.0,22.0
3,WS_17063,2021-02-08,morning,overcast clouds,4,1,63.0,52.0,34.0,22.0
4,WS_17064,2021-02-08,evening,overcast clouds,5,3,64.0,54.0,32.0,23.0


In [None]:
mean_values = met_forecast_v2.groupby(by=['station_no', 'datetime']).mean().reset_index()
mean_values.sort_values(by=['station_no', 'datetime']).head()

Unnamed: 0,station_no,datetime,temp_max_day5,temp_min_day5,humidity_max_day5,humidity_min_day5,wind_dir_day5,wind_speed_day5
0,WS_17062,2021-02-08,4,2,63.0,54.0,31.0,24.0
1,WS_17063,2021-02-08,4,1,63.0,52.0,34.0,22.0
2,WS_17064,2021-02-08,5,3,64.0,54.0,32.0,23.0
3,WS_17065,2021-02-08,5,1,62.0,53.0,29.0,18.0
4,WS_17610,2021-02-08,5,0,61.0,56.0,341.0,22.0


In [None]:
numerical_values = met_forecast_v2.describe().columns
non_num_df = met_forecast_v2[[x for x in met_forecast_v2.columns 
                              if x not in numerical_values]]
non_num_df.head()

Unnamed: 0,station_no,datetime,report_time,weather_day5
0,WS_17062,2021-02-08,evening,overcast clouds
1,WS_17062,2021-02-08,morning,overcast clouds
2,WS_17063,2021-02-08,evening,overcast clouds
3,WS_17063,2021-02-08,morning,overcast clouds
4,WS_17064,2021-02-08,evening,overcast clouds


In [None]:
modified_forecast_df = pd.merge(left=mean_values, right=non_num_df, on=['station_no', 'datetime'])
modified_forecast_df.head()

Unnamed: 0,station_no,datetime,temp_max_day5,temp_min_day5,humidity_max_day5,humidity_min_day5,wind_dir_day5,wind_speed_day5,report_time,weather_day5
0,WS_17062,2021-02-08,4,2,63.0,54.0,31.0,24.0,evening,overcast clouds
1,WS_17062,2021-02-08,4,2,63.0,54.0,31.0,24.0,morning,overcast clouds
2,WS_17063,2021-02-08,4,1,63.0,52.0,34.0,22.0,evening,overcast clouds
3,WS_17063,2021-02-08,4,1,63.0,52.0,34.0,22.0,morning,overcast clouds
4,WS_17064,2021-02-08,5,3,64.0,54.0,32.0,23.0,evening,overcast clouds


In [None]:
modified_forecast_df.shape

(20, 10)

In [None]:
modified_forecast_df.drop('report_time',axis='columns', inplace=True)
modified_forecast_df=modified_forecast_df.drop_duplicates(keep='first')

In [None]:
modified_forecast_df.shape

(10, 9)

In [None]:
modified_forecast_df.reset_index(inplace=True)
modified_forecast_df

Unnamed: 0,index,station_no,datetime,temp_max_day5,temp_min_day5,humidity_max_day5,humidity_min_day5,wind_dir_day5,wind_speed_day5,weather_day5
0,0,WS_17062,2021-02-08,4,2,63.0,54.0,31.0,24.0,overcast clouds
1,2,WS_17063,2021-02-08,4,1,63.0,52.0,34.0,22.0,overcast clouds
2,4,WS_17064,2021-02-08,5,3,64.0,54.0,32.0,23.0,overcast clouds
3,6,WS_17065,2021-02-08,5,1,62.0,53.0,29.0,18.0,overcast clouds
4,8,WS_17610,2021-02-08,5,0,61.0,56.0,341.0,22.0,overcast clouds
5,10,WS_18100,2021-02-08,7,1,63.0,52.0,39.0,24.0,overcast clouds
6,12,WS_18397,2021-02-08,4,0,61.0,54.0,28.0,17.0,overcast clouds
7,14,WS_18403,2021-02-08,3,1,61.0,53.0,29.0,20.0,overcast clouds
8,16,WS_18404,2021-02-08,5,1,63.0,54.0,359.0,23.0,overcast clouds
9,18,WS_19111,2021-02-08,3,0,,,,,overcast clouds


In [None]:
modified_forecast_df.drop('index',axis='columns', inplace=True)

In [None]:
modified_forecast_df

Unnamed: 0,station_no,datetime,temp_max_day5,temp_min_day5,humidity_max_day5,humidity_min_day5,wind_dir_day5,wind_speed_day5,weather_day5
0,WS_17062,2021-02-08,4,2,63.0,54.0,31.0,24.0,overcast clouds
1,WS_17063,2021-02-08,4,1,63.0,52.0,34.0,22.0,overcast clouds
2,WS_17064,2021-02-08,5,3,64.0,54.0,32.0,23.0,overcast clouds
3,WS_17065,2021-02-08,5,1,62.0,53.0,29.0,18.0,overcast clouds
4,WS_17610,2021-02-08,5,0,61.0,56.0,341.0,22.0,overcast clouds
5,WS_18100,2021-02-08,7,1,63.0,52.0,39.0,24.0,overcast clouds
6,WS_18397,2021-02-08,4,0,61.0,54.0,28.0,17.0,overcast clouds
7,WS_18403,2021-02-08,3,1,61.0,53.0,29.0,20.0,overcast clouds
8,WS_18404,2021-02-08,5,1,63.0,54.0,359.0,23.0,overcast clouds
9,WS_19111,2021-02-08,3,0,,,,,overcast clouds


In [None]:
modified_forecast_df.shape

(10, 9)

In [None]:
# Example read for RL-KPIS
distances = read_table_from_zip(data_zip_path, "distances.tsv")
distances.tail(5)

Unnamed: 0,WS_19111,WS_17047,WS_18397,WS_17062,WS_17813,WS_17064,WS_19112,WS_17063,WS_18736,WS_17065,WS_18399,WS_17610,WS_18792,WS_18735,WS_18100,WS_17437,WS_17448,WS_18403,WS_18404,WS_17389,RL_U7MPL,RL_X;ORF,RL_X;OE?,RL_\K[RM,RL_UBUGK,RL_X;OQA,RL_\K[LJ,RL_L=SKK,RL_\K[QH,RL_\K[EJ,RL_\K[EH,RL_KMLM0,RL_KMBTO,RL_KMPPE,RL_[EDCP,RL_K=LMV,RL_SEI?I,RL_IBLMM,RL_JETKO,RL_b;ZI>,...,RL_[KBQ>,RL_S7EPE,RL_P7ZEQ,RL_S7ECS,RL_JETRB,RL_S7EA>,RL_UBUQQ,RL_S7EWB,RL_S7EL>,RL_SKDRB,RL_I9UCH,RL_K7EKV,RL_\;EFV,RL_S?[@@,RL_POOSQ,RL_JETQ@,RL_I9JER,RL_UENSQ,RL_SE[Q@,RL_\;EDF,RL_SHUIL,RL_N8BJ>,RL_[7TIQ,RL_U?OCQ,RL_X;O@Q,RL_SHU@L,RL_S:ISI,RL_J7DG?,RL_JET@P,RL_N;OEL,RL_]=UC>,RL_]ITCK,RL_[KBDF,RL_X;O@O,RL_b?LMI,RL_]IC?O,RL_S:PQD,RL_JEZWL,RL_LKEII,RL_[EOMJ
RL_]IC?O,41.30192,56.87341,21.6502,2.99772,3.99696,13.3232,16.15438,22.39963,22.14982,13.23993,17.90305,42.96732,38.72055,57.62284,25.73043,23.06579,26.89621,7.57757,1.58213,9.9924,10.57529,22.23309,31.6426,27.22929,10.65856,18.65248,29.64412,44.88253,25.23081,23.89849,24.73119,17.98632,1.83194,24.981,2.4981,3.16426,6.24525,1.41559,7.57757,0.74943,...,6.49506,2.41483,1.6654,2.41483,7.16122,2.58137,11.49126,2.33156,1.74867,7.99392,1.6654,5.16274,1.83194,3.58061,1.41559,8.16046,1.33232,3.3308,5.99544,0.8327,16.32092,4.24677,6.07871,4.91293,19.40191,17.32016,3.3308,0.24981,7.4943,3.99696,15.15514,1.41559,6.6616,19.48518,2.83118,0.0,2.16502,7.07795,11.07491,2.58137
RL_S:PQD,41.46846,57.53957,22.81598,0.91597,2.74791,12.24069,15.73803,21.98328,22.73271,13.48974,18.48594,44.21637,39.38671,58.45554,24.81446,22.23309,25.98024,8.66008,3.66388,8.07719,9.65932,21.81674,31.89241,27.06275,9.90913,18.06959,29.39431,45.29888,24.56465,22.98252,23.89849,20.06807,0.99924,24.64792,0.66616,5.24601,5.74563,3.41407,6.49506,1.6654,...,5.16274,0.24981,0.66616,0.99924,6.41179,0.41635,10.32548,0.24981,0.58289,6.99468,1.58213,3.74715,0.74943,1.91521,0.91597,7.07795,2.08175,1.24905,5.49582,2.91445,15.32168,2.66464,4.74639,4.08023,18.48594,16.48746,1.24905,1.91521,6.32852,2.41483,14.82206,2.74791,5.57909,18.56921,1.33232,2.16502,0.0,6.32852,11.40799,1.16578
RL_JEZWL,47.79698,63.78482,18.98556,6.41179,3.58061,6.24525,9.49278,15.65476,17.4867,8.07719,13.40647,40.30268,34.30724,53.62588,18.65248,16.07111,19.90153,6.32852,7.41103,11.57453,3.49734,15.57149,26.06351,20.90077,3.66388,11.74107,23.14906,39.71979,18.3194,16.82054,17.65324,21.73347,7.32776,18.40267,5.66236,8.16046,1.08251,7.16122,1.33232,7.32776,...,2.08175,6.24525,6.82814,5.32928,0.08327,6.16198,4.57985,6.07871,6.07871,1.24905,5.41255,3.16426,6.99468,4.66312,6.99468,1.6654,5.74563,6.16198,1.16578,7.74411,9.32624,3.99696,2.33156,2.24829,12.32396,10.24221,5.8289,6.91141,1.41559,4.1635,8.57681,8.41027,1.49886,12.40723,4.9962,7.07795,6.32852,0.0,6.32852,5.16274
RL_LKEII,52.29356,67.4487,12.82358,11.90761,9.24297,7.82738,6.49506,12.24069,11.40799,2.16502,7.16122,33.97416,28.14526,47.38063,17.15362,14.48898,18.56921,4.74639,10.57529,17.81978,6.24525,11.82434,20.56769,16.40419,5.66236,9.40951,18.90229,33.97416,15.73803,15.65476,15.90457,19.73499,12.15742,14.48898,10.90837,10.24221,6.07871,10.40875,7.32776,11.6578,...,8.41027,11.40799,11.74107,10.49202,6.24525,11.49126,7.74411,11.24145,10.99164,6.91141,9.9924,9.24297,11.90761,10.32548,11.74107,7.32776,9.90913,11.74107,6.32852,11.49126,9.65932,9.82586,8.57681,7.74411,11.40799,9.40951,11.40799,11.07491,7.4943,9.90913,5.57909,12.40723,7.82738,11.6578,10.24221,11.07491,11.40799,6.32852,0.0,10.32548
RL_[EOMJ,42.63424,58.62208,21.98328,1.49886,1.6654,11.15818,14.65552,20.8175,21.73347,12.4905,17.4867,43.38367,38.47074,57.53957,23.73195,21.15058,24.89773,7.91065,3.83042,8.49354,8.57681,20.73423,30.8099,25.98024,8.74335,16.98708,28.3118,44.29964,23.48214,21.81674,22.73271,20.15134,2.16502,23.56541,0.49962,5.32928,4.66312,3.49734,5.41255,2.41483,...,4.08023,1.08251,1.6654,0.16654,5.32928,1.08251,9.24297,0.91597,0.99924,5.91217,1.16578,2.74791,1.83194,0.99924,1.83194,5.99544,1.83194,1.49886,4.33004,3.41407,14.23917,1.74867,3.66388,2.99772,17.32016,15.32168,1.24905,2.33156,5.24601,1.49886,13.65628,3.58061,4.41331,17.40343,0.24981,2.58137,1.16578,5.16274,10.32548,0.0


In [None]:
# Example read for RL-KPIS
met_stations = read_table_from_zip(data_zip_path, "met-stations.tsv")
met_stations.tail(5)

Unnamed: 0,station_no,height,clutter_class
15,WS_18735,6,DENSE TREE
16,WS_18736,70,SPARSE TREE
17,WS_18792,256,OPEN LAND
18,WS_19111,78,OPEN IN URBAN
19,WS_19112,330,DENSE TREE


In [None]:
stations = met_stations['station_no'].tolist()

In [None]:
# To find nearest station
def find_nearest_stations(site_id: str, distances: pd.DataFrame, 
                          stations: list, k: int = 1) -> str:
  temp = distances[[site_id]].sort_values(by=[site_id])
  temp = temp.loc[[x for x in temp.index if x in stations]].head(k)
  return list(temp.index)

In [None]:
# Getting nearest station (just 1) based on the antennas - This might take a while
rl_kpis_with_labels['nearest_station'] = [find_nearest_stations(site_id, distances, stations)[0] for site_id in rl_kpis_with_labels['site_id']]

In [None]:
rl_kpis_with_labels

Unnamed: 0,type,datetime,tip,mlid,mw_connection_no,site_id,card_type,adaptive_modulation,freq_band,severaly_error_second,error_second,unavail_second,avail_time,bbe,rxlevmax,scalibility_score,capacity,modulation,rlf,1-day-predict,5-day-predict,nearest_station
0,ENK,2021-01-25,FAR,A0BE,1349988,RL_;ABDV,cardtype1,Enable,f3,0,0,0.0,86400,0.0,-33.2,0.0,456,1024QAM,False,False,False,WS_18403
1,ENK,2021-01-25,FAR,A0BI,1349988,RL_;ABDV,cardtype1,Enable,f3,0,0,0.0,86400,0.0,-32.9,0.0,456,1024QAM,False,False,False,WS_18403
2,ENK,2021-01-25,FAR,A5AB,1344018,RL_;ABDV,cardtype4,Enable,f3,0,0,0.0,86400,0.0,-34.7,0.0,406,512QAM,False,False,False,WS_18403
3,NEC,2021-01-25,FAR,A8CQ,1351204,RL_;ABDV,cardtype5,Enable,f2,0,0,0.0,86400,0.0,-40.4,0.0,247,2048QAM*,False,False,False,WS_18403
4,NEC,2021-01-25,FAR,A8DQ,1351204,RL_;ABDV,cardtype5,Enable,f2,0,0,0.0,86400,0.0,-40.4,0.0,247,2048QAM*,False,False,False,WS_18403
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50973,NEC,2021-02-13,NEAR,A6FD,1371370,RL_bKZCQ,cardtype5,Enable,f3,0,0,0.0,86400,0.0,-37.5,0.0,495,2048QAM*,False,,False,WS_17064
50974,NEC,2021-02-13,NEAR,A6NA,1371370,RL_bKZCQ,cardtype5,Enable,f3,0,0,0.0,86400,0.0,-37.5,0.0,495,2048QAM*,False,,False,WS_17064
50975,NEC,2021-02-13,FAR,A8FJ,1371440,RL_bKZCQ,cardtype5,Enable,f3,0,0,0.0,86400,0.0,-28.9,0.0,495,2048QAM*,False,,False,WS_17064
50976,NEC,2021-02-13,FAR,A8HV,1371440,RL_bKZCQ,cardtype5,Enable,f3,0,0,0.0,86400,0.0,-28.9,0.0,495,2048QAM*,False,,False,WS_17064


In [None]:
type(rl_kpis_with_labels['datetime'][0])

pandas._libs.tslibs.timestamps.Timestamp

In [None]:
# Example read for RL-KPIS
rl_sites = read_table_from_zip(data_zip_path, "rl-sites.tsv")
rl_sites.tail(5)

Unnamed: 0,site_id,groundheight,clutter_class
1406,RL_bKNAP,87.9808,LOW-DENSE URBAN
1407,RL_bKNLF,98.9881,AVERAGE-DENSE URBAN
1408,RL_bKNQF,148.9506,SPARSE TREE
1409,RL_bKZCQ,66.9809,HIGH-DENSE URBAN
1410,RL_bLTQH,39.97,AVERAGE-MEDIUM URBAN


In [None]:
modified_forecast_df['datetime']=[pd.Timestamp(x)+pd.Timedelta(days=5) for x in modified_forecast_df['datetime']]

In [None]:
modified_forecast_df

Unnamed: 0,station_no,datetime,temp_max_day5,temp_min_day5,humidity_max_day5,humidity_min_day5,wind_dir_day5,wind_speed_day5,weather_day5
0,WS_17062,2021-02-13,4,2,63.0,54.0,31.0,24.0,overcast clouds
1,WS_17063,2021-02-13,4,1,63.0,52.0,34.0,22.0,overcast clouds
2,WS_17064,2021-02-13,5,3,64.0,54.0,32.0,23.0,overcast clouds
3,WS_17065,2021-02-13,5,1,62.0,53.0,29.0,18.0,overcast clouds
4,WS_17610,2021-02-13,5,0,61.0,56.0,341.0,22.0,overcast clouds
5,WS_18100,2021-02-13,7,1,63.0,52.0,39.0,24.0,overcast clouds
6,WS_18397,2021-02-13,4,0,61.0,54.0,28.0,17.0,overcast clouds
7,WS_18403,2021-02-13,3,1,61.0,53.0,29.0,20.0,overcast clouds
8,WS_18404,2021-02-13,5,1,63.0,54.0,359.0,23.0,overcast clouds
9,WS_19111,2021-02-13,3,0,,,,,overcast clouds


In [None]:
modified_forecast_df.rename(columns={'datetime':'forecast_datetime', 'station_no':'nearest_station'}, inplace=True)

In [None]:
## KPI Historical (one day)
rl_kpis_history = rl_kpis_with_labels.copy()

# Dropping columns.
rl_kpis_history.drop(columns = ['nearest_station'],inplace=True)

# Assuring the dates are timestamp type
rl_kpis_history['datetime'] = [pd.Timestamp(x) for x in rl_kpis_history['datetime']]

# Adding with site data.
rl_kpis_history = rl_kpis_history.merge(rl_sites[['site_id','groundheight','clutter_class']], on='site_id')

# Renaming columns for merging.
rl_kpis_history.columns= ['history_{}'.format(column) for column in rl_kpis_history.columns]

rl_kpis_history

Unnamed: 0,history_type,history_datetime,history_tip,history_mlid,history_mw_connection_no,history_site_id,history_card_type,history_adaptive_modulation,history_freq_band,history_severaly_error_second,history_error_second,history_unavail_second,history_avail_time,history_bbe,history_rxlevmax,history_scalibility_score,history_capacity,history_modulation,history_rlf,history_1-day-predict,history_5-day-predict,history_groundheight,history_clutter_class
0,ENK,2021-01-25,FAR,A0BE,1349988,RL_;ABDV,cardtype1,Enable,f3,0,0,0.0,86400,0.0,-33.2,0.0,456,1024QAM,False,False,False,107.9658,OPEN LAND
1,ENK,2021-01-25,FAR,A0BI,1349988,RL_;ABDV,cardtype1,Enable,f3,0,0,0.0,86400,0.0,-32.9,0.0,456,1024QAM,False,False,False,107.9658,OPEN LAND
2,ENK,2021-01-25,FAR,A5AB,1344018,RL_;ABDV,cardtype4,Enable,f3,0,0,0.0,86400,0.0,-34.7,0.0,406,512QAM,False,False,False,107.9658,OPEN LAND
3,NEC,2021-01-25,FAR,A8CQ,1351204,RL_;ABDV,cardtype5,Enable,f2,0,0,0.0,86400,0.0,-40.4,0.0,247,2048QAM*,False,False,False,107.9658,OPEN LAND
4,NEC,2021-01-25,FAR,A8DQ,1351204,RL_;ABDV,cardtype5,Enable,f2,0,0,0.0,86400,0.0,-40.4,0.0,247,2048QAM*,False,False,False,107.9658,OPEN LAND
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50973,NEC,2021-02-05,NEAR,A4XD,236782,RL_U;OBF,cardtype5,Enable,f3,0,0,0.0,86400,0.0,-38.0,0.0,222,1024 QAM,False,,False,1.0000,OPEN LAND
50974,NEC,2021-02-10,NEAR,A4PB,236782,RL_U;OBF,cardtype5,Enable,f3,0,0,0.0,86400,0.0,-36.3,0.0,222,1024 QAM,False,False,False,1.0000,OPEN LAND
50975,NEC,2021-02-10,NEAR,A4XD,236782,RL_U;OBF,cardtype5,Enable,f3,0,0,0.0,86400,0.0,-36.3,0.0,222,1024 QAM,False,False,False,1.0000,OPEN LAND
50976,NEC,2021-02-11,NEAR,A4PB,236782,RL_U;OBF,cardtype5,Enable,f3,0,0,0.0,86400,0.0,-36.6,0.0,222,1024 QAM,False,,False,1.0000,OPEN LAND


In [None]:
rl_kpis_with_labels['datetime'][0]-pd.Timedelta(days=1)

Timestamp('2021-01-24 00:00:00')

In [None]:
# Forecast datetime should be - 1 day from the kpis datetime
from datetime import datetime
rl_kpis_with_labels['datetime'] = [pd.Timestamp(x) for x in rl_kpis_with_labels['datetime']]
rl_kpis_with_labels['forecast_datetime'] = [x for x in rl_kpis_with_labels['datetime']]

In [None]:
rl_kpis_with_labels

Unnamed: 0,type,datetime,tip,mlid,mw_connection_no,site_id,card_type,adaptive_modulation,freq_band,severaly_error_second,error_second,unavail_second,avail_time,bbe,rxlevmax,scalibility_score,capacity,modulation,rlf,1-day-predict,5-day-predict,nearest_station,forecast_datetime
0,ENK,2021-01-25,FAR,A0BE,1349988,RL_;ABDV,cardtype1,Enable,f3,0,0,0.0,86400,0.0,-33.2,0.0,456,1024QAM,False,False,False,WS_18403,2021-01-25
1,ENK,2021-01-25,FAR,A0BI,1349988,RL_;ABDV,cardtype1,Enable,f3,0,0,0.0,86400,0.0,-32.9,0.0,456,1024QAM,False,False,False,WS_18403,2021-01-25
2,ENK,2021-01-25,FAR,A5AB,1344018,RL_;ABDV,cardtype4,Enable,f3,0,0,0.0,86400,0.0,-34.7,0.0,406,512QAM,False,False,False,WS_18403,2021-01-25
3,NEC,2021-01-25,FAR,A8CQ,1351204,RL_;ABDV,cardtype5,Enable,f2,0,0,0.0,86400,0.0,-40.4,0.0,247,2048QAM*,False,False,False,WS_18403,2021-01-25
4,NEC,2021-01-25,FAR,A8DQ,1351204,RL_;ABDV,cardtype5,Enable,f2,0,0,0.0,86400,0.0,-40.4,0.0,247,2048QAM*,False,False,False,WS_18403,2021-01-25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50973,NEC,2021-02-13,NEAR,A6FD,1371370,RL_bKZCQ,cardtype5,Enable,f3,0,0,0.0,86400,0.0,-37.5,0.0,495,2048QAM*,False,,False,WS_17064,2021-02-13
50974,NEC,2021-02-13,NEAR,A6NA,1371370,RL_bKZCQ,cardtype5,Enable,f3,0,0,0.0,86400,0.0,-37.5,0.0,495,2048QAM*,False,,False,WS_17064,2021-02-13
50975,NEC,2021-02-13,FAR,A8FJ,1371440,RL_bKZCQ,cardtype5,Enable,f3,0,0,0.0,86400,0.0,-28.9,0.0,495,2048QAM*,False,,False,WS_17064,2021-02-13
50976,NEC,2021-02-13,FAR,A8HV,1371440,RL_bKZCQ,cardtype5,Enable,f3,0,0,0.0,86400,0.0,-28.9,0.0,495,2048QAM*,False,,False,WS_17064,2021-02-13


In [None]:
# Some values seems to be dropped
merged_df = pd.merge(rl_kpis_with_labels, modified_forecast_df, 
                      on=['nearest_station','forecast_datetime'],
                      validate='m:m')
merged_df

Unnamed: 0,type,datetime,tip,mlid,mw_connection_no,site_id,card_type,adaptive_modulation,freq_band,severaly_error_second,error_second,unavail_second,avail_time,bbe,rxlevmax,scalibility_score,capacity,modulation,rlf,1-day-predict,5-day-predict,nearest_station,forecast_datetime,temp_max_day5,temp_min_day5,humidity_max_day5,humidity_min_day5,wind_dir_day5,wind_speed_day5,weather_day5
0,ENK,2021-02-13,FAR,A0BE,1349988,RL_;ABDV,cardtype1,Enable,f3,0,0,0.0,86400,0.0,-31.0,0.0,456,1024QAM,False,,False,WS_18403,2021-02-13,3,1,61.0,53.0,29.0,20.0,overcast clouds
1,ENK,2021-02-13,FAR,A0BI,1349988,RL_;ABDV,cardtype1,Enable,f3,0,0,0.0,86400,0.0,-30.7,0.0,456,1024QAM,False,,False,WS_18403,2021-02-13,3,1,61.0,53.0,29.0,20.0,overcast clouds
2,ENK,2021-02-13,FAR,A5AB,1344018,RL_;ABDV,cardtype4,Enable,f3,0,0,0.0,86400,0.0,-34.7,0.0,406,512QAM,False,,False,WS_18403,2021-02-13,3,1,61.0,53.0,29.0,20.0,overcast clouds
3,NEC,2021-02-13,FAR,A8CQ,1351204,RL_;ABDV,cardtype5,Enable,f2,0,0,86400.0,86400,0.0,-99.0,0.0,247,2048QAM*,False,,False,WS_18403,2021-02-13,3,1,61.0,53.0,29.0,20.0,overcast clouds
4,NEC,2021-02-13,FAR,A8DQ,1351204,RL_;ABDV,cardtype5,Enable,f2,0,0,86400.0,86400,0.0,-99.0,0.0,247,2048QAM*,False,,False,WS_18403,2021-02-13,3,1,61.0,53.0,29.0,20.0,overcast clouds
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1829,ENK,2021-02-13,FAR,A1NX,338527,RL_\K[WE,cardtype4,Enable,f3,0,0,0.0,86400,0.0,-39.8,0.0,95,512QAM,False,,False,WS_18100,2021-02-13,7,1,63.0,52.0,39.0,24.0,overcast clouds
1830,ENK,2021-02-13,FAR,A8KW,1386559,RL_\K[WE,cardtype4,Enable,f5,0,0,0.0,86400,0.0,-39.8,0.0,406,512QAM,False,,False,WS_18100,2021-02-13,7,1,63.0,52.0,39.0,24.0,overcast clouds
1831,ENK,2021-02-13,FAR,A1RU,336465,RL_\K[WV,cardtype4,Enable,f5,0,0,0.0,86400,0.0,-39.8,0.0,406,512QAM(QO),False,,False,WS_18100,2021-02-13,7,1,63.0,52.0,39.0,24.0,overcast clouds
1832,ENK,2021-02-13,FAR,A8GS,1362262,RL_\K[WV,cardtype1,Enable,f3,0,0,0.0,86400,0.0,-39.5,0.0,406,512QAM,False,,False,WS_18100,2021-02-13,7,1,63.0,52.0,39.0,24.0,overcast clouds


In [None]:
merged_df.to_csv('/content/drive/MyDrive/FYP/sample_data.csv')

In [None]:
merged_df['forecast_datetime'] = [x  - pd.Timedelta(days=1) for x in merged_df['datetime']]

In [None]:
# Some additional values seem to be dropped
merged_df = merged_df.merge(rl_kpis_history,left_on=['mlid','forecast_datetime'],right_on=['history_mlid','history_datetime'],validate='m:m')
merged_df

Unnamed: 0,type,datetime,tip,mlid,mw_connection_no,site_id,card_type,adaptive_modulation,freq_band,severaly_error_second,error_second,unavail_second,avail_time,bbe,rxlevmax,scalibility_score,capacity,modulation,rlf,1-day-predict,5-day-predict,nearest_station,forecast_datetime,temp_max_day5,temp_min_day5,humidity_max_day5,humidity_min_day5,wind_dir_day5,wind_speed_day5,weather_day5,history_type,history_datetime,history_tip,history_mlid,history_mw_connection_no,history_site_id,history_card_type,history_adaptive_modulation,history_freq_band,history_severaly_error_second,history_error_second,history_unavail_second,history_avail_time,history_bbe,history_rxlevmax,history_scalibility_score,history_capacity,history_modulation,history_rlf,history_1-day-predict,history_5-day-predict,history_groundheight,history_clutter_class
0,ENK,2021-02-13,FAR,A0BE,1349988,RL_;ABDV,cardtype1,Enable,f3,0,0,0.0,86400,0.0,-31.0,0.0,456,1024QAM,False,,False,WS_18403,2021-02-12,3,1,61.0,53.0,29.0,20.0,overcast clouds,ENK,2021-02-12,FAR,A0BE,1349988,RL_;ABDV,cardtype1,Enable,f3,0,0,0.0,86400,0.0,-31.0,0.0,456,1024QAM,False,False,False,107.9658,OPEN LAND
1,ENK,2021-02-13,FAR,A0BI,1349988,RL_;ABDV,cardtype1,Enable,f3,0,0,0.0,86400,0.0,-30.7,0.0,456,1024QAM,False,,False,WS_18403,2021-02-12,3,1,61.0,53.0,29.0,20.0,overcast clouds,ENK,2021-02-12,FAR,A0BI,1349988,RL_;ABDV,cardtype1,Enable,f3,0,0,0.0,86400,0.0,-30.4,0.0,456,1024QAM,False,False,False,107.9658,OPEN LAND
2,ENK,2021-02-13,FAR,A5AB,1344018,RL_;ABDV,cardtype4,Enable,f3,0,0,0.0,86400,0.0,-34.7,0.0,406,512QAM,False,,False,WS_18403,2021-02-12,3,1,61.0,53.0,29.0,20.0,overcast clouds,ENK,2021-02-12,FAR,A5AB,1344018,RL_;ABDV,cardtype4,Enable,f3,0,0,0.0,86400,0.0,-34.4,0.0,406,512QAM,False,False,False,107.9658,OPEN LAND
3,NEC,2021-02-13,FAR,A8CQ,1351204,RL_;ABDV,cardtype5,Enable,f2,0,0,86400.0,86400,0.0,-99.0,0.0,247,2048QAM*,False,,False,WS_18403,2021-02-12,3,1,61.0,53.0,29.0,20.0,overcast clouds,NEC,2021-02-12,FAR,A8CQ,1351204,RL_;ABDV,cardtype5,Enable,f2,0,0,86400.0,86400,0.0,-98.7,0.0,247,2048QAM*,False,False,False,107.9658,OPEN LAND
4,NEC,2021-02-13,FAR,A8DQ,1351204,RL_;ABDV,cardtype5,Enable,f2,0,0,86400.0,86400,0.0,-99.0,0.0,247,2048QAM*,False,,False,WS_18403,2021-02-12,3,1,61.0,53.0,29.0,20.0,overcast clouds,NEC,2021-02-12,FAR,A8DQ,1351204,RL_;ABDV,cardtype5,Enable,f2,0,0,86400.0,86400,0.0,-98.7,0.0,247,2048QAM*,False,False,False,107.9658,OPEN LAND
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1815,ENK,2021-02-13,FAR,A1NX,338527,RL_\K[WE,cardtype4,Enable,f3,0,0,0.0,86400,0.0,-39.8,0.0,95,512QAM,False,,False,WS_18100,2021-02-12,7,1,63.0,52.0,39.0,24.0,overcast clouds,ENK,2021-02-12,FAR,A1NX,338527,RL_\K[WE,cardtype4,Enable,f3,0,0,0.0,86400,0.0,-39.8,0.0,95,512QAM,False,False,False,17.0185,OPEN IN URBAN
1816,ENK,2021-02-13,FAR,A8KW,1386559,RL_\K[WE,cardtype4,Enable,f5,0,0,0.0,86400,0.0,-39.8,0.0,406,512QAM,False,,False,WS_18100,2021-02-12,7,1,63.0,52.0,39.0,24.0,overcast clouds,ENK,2021-02-12,FAR,A8KW,1386559,RL_\K[WE,cardtype4,Enable,f5,0,0,0.0,86400,0.0,-39.5,0.0,406,512QAM,False,False,False,17.0185,OPEN IN URBAN
1817,ENK,2021-02-13,FAR,A1RU,336465,RL_\K[WV,cardtype4,Enable,f5,0,0,0.0,86400,0.0,-39.8,0.0,406,512QAM(QO),False,,False,WS_18100,2021-02-12,7,1,63.0,52.0,39.0,24.0,overcast clouds,ENK,2021-02-12,FAR,A1RU,336465,RL_\K[WV,cardtype4,Enable,f5,0,0,0.0,86400,0.0,-39.5,0.0,406,512QAM(QO),False,False,False,8.9776,OPEN IN URBAN
1818,ENK,2021-02-13,FAR,A8GS,1362262,RL_\K[WV,cardtype1,Enable,f3,0,0,0.0,86400,0.0,-39.5,0.0,406,512QAM,False,,False,WS_18100,2021-02-12,7,1,63.0,52.0,39.0,24.0,overcast clouds,ENK,2021-02-12,FAR,A8GS,1362262,RL_\K[WV,cardtype1,Enable,f3,0,0,0.0,86400,0.0,-39.8,0.0,406,512QAM,False,False,False,8.9776,OPEN IN URBAN


In [None]:
merged_df.to_csv('/content/drive/MyDrive/FYP/sample_data.csv')

In [None]:
merged_df=merged_df.drop(columns=['scalibility_score'])

In [None]:
merged_df

Unnamed: 0,type,datetime,tip,mlid,mw_connection_no,site_id,card_type,adaptive_modulation,freq_band,severaly_error_second,error_second,unavail_second,avail_time,bbe,rxlevmax,capacity,modulation,rlf,1-day-predict,5-day-predict,nearest_station,forecast_datetime,temp_max_day5,temp_min_day5,humidity_max_day5,humidity_min_day5,wind_dir_day5,wind_speed_day5,weather_day5,history_type,history_datetime,history_tip,history_mlid,history_mw_connection_no,history_site_id,history_card_type,history_adaptive_modulation,history_freq_band,history_severaly_error_second,history_error_second,history_unavail_second,history_avail_time,history_bbe,history_rxlevmax,history_scalibility_score,history_capacity,history_modulation,history_rlf,history_1-day-predict,history_5-day-predict,history_groundheight,history_clutter_class
0,ENK,2021-02-13,FAR,A0BE,1349988,RL_;ABDV,cardtype1,Enable,f3,0,0,0.0,86400,0.0,-31.0,456,1024QAM,False,,False,WS_18403,2021-02-12,3,1,61.0,53.0,29.0,20.0,overcast clouds,ENK,2021-02-12,FAR,A0BE,1349988,RL_;ABDV,cardtype1,Enable,f3,0,0,0.0,86400,0.0,-31.0,0.0,456,1024QAM,False,False,False,107.9658,OPEN LAND
1,ENK,2021-02-13,FAR,A0BI,1349988,RL_;ABDV,cardtype1,Enable,f3,0,0,0.0,86400,0.0,-30.7,456,1024QAM,False,,False,WS_18403,2021-02-12,3,1,61.0,53.0,29.0,20.0,overcast clouds,ENK,2021-02-12,FAR,A0BI,1349988,RL_;ABDV,cardtype1,Enable,f3,0,0,0.0,86400,0.0,-30.4,0.0,456,1024QAM,False,False,False,107.9658,OPEN LAND
2,ENK,2021-02-13,FAR,A5AB,1344018,RL_;ABDV,cardtype4,Enable,f3,0,0,0.0,86400,0.0,-34.7,406,512QAM,False,,False,WS_18403,2021-02-12,3,1,61.0,53.0,29.0,20.0,overcast clouds,ENK,2021-02-12,FAR,A5AB,1344018,RL_;ABDV,cardtype4,Enable,f3,0,0,0.0,86400,0.0,-34.4,0.0,406,512QAM,False,False,False,107.9658,OPEN LAND
3,NEC,2021-02-13,FAR,A8CQ,1351204,RL_;ABDV,cardtype5,Enable,f2,0,0,86400.0,86400,0.0,-99.0,247,2048QAM*,False,,False,WS_18403,2021-02-12,3,1,61.0,53.0,29.0,20.0,overcast clouds,NEC,2021-02-12,FAR,A8CQ,1351204,RL_;ABDV,cardtype5,Enable,f2,0,0,86400.0,86400,0.0,-98.7,0.0,247,2048QAM*,False,False,False,107.9658,OPEN LAND
4,NEC,2021-02-13,FAR,A8DQ,1351204,RL_;ABDV,cardtype5,Enable,f2,0,0,86400.0,86400,0.0,-99.0,247,2048QAM*,False,,False,WS_18403,2021-02-12,3,1,61.0,53.0,29.0,20.0,overcast clouds,NEC,2021-02-12,FAR,A8DQ,1351204,RL_;ABDV,cardtype5,Enable,f2,0,0,86400.0,86400,0.0,-98.7,0.0,247,2048QAM*,False,False,False,107.9658,OPEN LAND
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1815,ENK,2021-02-13,FAR,A1NX,338527,RL_\K[WE,cardtype4,Enable,f3,0,0,0.0,86400,0.0,-39.8,95,512QAM,False,,False,WS_18100,2021-02-12,7,1,63.0,52.0,39.0,24.0,overcast clouds,ENK,2021-02-12,FAR,A1NX,338527,RL_\K[WE,cardtype4,Enable,f3,0,0,0.0,86400,0.0,-39.8,0.0,95,512QAM,False,False,False,17.0185,OPEN IN URBAN
1816,ENK,2021-02-13,FAR,A8KW,1386559,RL_\K[WE,cardtype4,Enable,f5,0,0,0.0,86400,0.0,-39.8,406,512QAM,False,,False,WS_18100,2021-02-12,7,1,63.0,52.0,39.0,24.0,overcast clouds,ENK,2021-02-12,FAR,A8KW,1386559,RL_\K[WE,cardtype4,Enable,f5,0,0,0.0,86400,0.0,-39.5,0.0,406,512QAM,False,False,False,17.0185,OPEN IN URBAN
1817,ENK,2021-02-13,FAR,A1RU,336465,RL_\K[WV,cardtype4,Enable,f5,0,0,0.0,86400,0.0,-39.8,406,512QAM(QO),False,,False,WS_18100,2021-02-12,7,1,63.0,52.0,39.0,24.0,overcast clouds,ENK,2021-02-12,FAR,A1RU,336465,RL_\K[WV,cardtype4,Enable,f5,0,0,0.0,86400,0.0,-39.5,0.0,406,512QAM(QO),False,False,False,8.9776,OPEN IN URBAN
1818,ENK,2021-02-13,FAR,A8GS,1362262,RL_\K[WV,cardtype1,Enable,f3,0,0,0.0,86400,0.0,-39.5,406,512QAM,False,,False,WS_18100,2021-02-12,7,1,63.0,52.0,39.0,24.0,overcast clouds,ENK,2021-02-12,FAR,A8GS,1362262,RL_\K[WV,cardtype1,Enable,f3,0,0,0.0,86400,0.0,-39.8,0.0,406,512QAM,False,False,False,8.9776,OPEN IN URBAN


In [None]:
merged_df.dtypes

type                                     object
datetime                         datetime64[ns]
tip                                      object
mlid                                     object
mw_connection_no                          int64
site_id                                  object
card_type                                object
adaptive_modulation                      object
freq_band                                object
severaly_error_second                     int64
error_second                              int64
unavail_second                          float64
avail_time                                int64
bbe                                     float64
rxlevmax                                float64
capacity                                  int64
modulation                               object
rlf                                        bool
1-day-predict                            object
5-day-predict                              bool
nearest_station                         

In [None]:
merged_df['history_freq_band'].unique()

array(['f3', 'f2', 'f5', 'f4', nan, 'f1'], dtype=object)

In [None]:
merged_df.columns[merged_df.isnull().any()]

Index(['freq_band', '1-day-predict', 'history_freq_band'], dtype='object')

In [None]:
merged_df.isnull().sum()

type                                0
datetime                            0
tip                                 0
mlid                                0
mw_connection_no                    0
site_id                             0
card_type                           0
adaptive_modulation                 0
freq_band                          15
severaly_error_second               0
error_second                        0
unavail_second                      0
avail_time                          0
bbe                                 0
rxlevmax                            0
capacity                            0
modulation                          0
rlf                                 0
1-day-predict                    1820
5-day-predict                       0
nearest_station                     0
forecast_datetime                   0
temp_max_day5                       0
temp_min_day5                       0
humidity_max_day5                   0
humidity_min_day5                   0
wind_dir_day

In [None]:
df=merged_df

In [None]:
import math
max_mode=df['history_freq_band'].mode()
for i in range(0, df.shape[0]):
  if pd.isna(df['history_freq_band'][i]):
      df['history_freq_band'][i]=max_mode[0]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


In [None]:
import math
max_mode=df['freq_band'].mode()
for i in range(0, df.shape[0]):
  if pd.isna(df['freq_band'][i]):
      df['freq_band'][i]=max_mode[0]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


In [None]:
merged_df.columns[merged_df.isnull().any()]

Index(['1-day-predict'], dtype='object')

In [None]:
df['wind_speed_day5']=df['wind_speed_day5'].astype('float')
df['humidity_min_day5']=df['humidity_min_day5'].astype('float')
df['humidity_max_day5']=df['humidity_max_day5'].astype('float')
df['capacity']=df['capacity'].astype('float')
df['history_capacity']=df['history_capacity'].astype('float')
df['wind_dir_day5']=df['wind_dir_day5'].astype('float')
df['freq_band'] = pd.factorize(df['freq_band'])[0]
df['history_freq_band'] = pd.factorize(df['history_freq_band'])[0]
df['weather_day5'] = pd.factorize(df['weather_day5'])[0]
df['nearest_station'] = pd.factorize(df['nearest_station'])[0]
df['type'] = pd.factorize(df['type'])[0]
df['history_type'] = pd.factorize(df['history_type'])[0]
df['tip'] = pd.factorize(df['tip'])[0]
df['history_tip'] = pd.factorize(df['history_tip'])[0]
df['mlid'] = pd.factorize(df['mlid'])[0]
df['history_mlid'] = pd.factorize(df['history_mlid'])[0]
df['mw_connection_no'] = pd.factorize(df['mw_connection_no'])[0]
df['history_mw_connection_no'] = pd.factorize(df['history_mw_connection_no'])[0]
df['site_id'] = pd.factorize(df['site_id'])[0]
df['history_site_id'] = pd.factorize(df['history_site_id'])[0]
df['card_type'] = pd.factorize(df['card_type'])[0]
df['history_card_type'] = pd.factorize(df['history_card_type'])[0]
df['history_card_type'] = pd.factorize(df['history_card_type'])[0]
df['history_adaptive_modulation'] = pd.factorize(df['history_adaptive_modulation'])[0]
df['adaptive_modulation'] = pd.factorize(df['adaptive_modulation'])[0]
df['history_clutter_class'] = pd.factorize(df['history_clutter_class'])[0]
df['modulation'] = pd.factorize(df['modulation'])[0]
df['history_modulation'] = pd.factorize(df['history_modulation'])[0]

In [None]:
df['month']=1
df

Unnamed: 0,type,datetime,tip,mlid,mw_connection_no,site_id,card_type,adaptive_modulation,freq_band,severaly_error_second,error_second,unavail_second,avail_time,bbe,rxlevmax,capacity,modulation,rlf,1-day-predict,5-day-predict,nearest_station,forecast_datetime,temp_max_day5,temp_min_day5,humidity_max_day5,humidity_min_day5,wind_dir_day5,wind_speed_day5,weather_day5,history_type,history_datetime,history_tip,history_mlid,history_mw_connection_no,history_site_id,history_card_type,history_adaptive_modulation,history_freq_band,history_severaly_error_second,history_error_second,history_unavail_second,history_avail_time,history_bbe,history_rxlevmax,history_scalibility_score,history_capacity,history_modulation,history_rlf,history_1-day-predict,history_5-day-predict,history_groundheight,history_clutter_class,month
0,0,2021-02-13,0,0,0,0,0,0,0,0,0,0.0,86400,0.0,-31.0,456.0,0,False,,False,0,2021-02-12,3,1,61.0,53.0,29.0,20.0,0,0,2021-02-12,0,0,0,0,0,0,0,0,0,0.0,86400,0.0,-31.0,0.0,456.0,0,False,False,False,107.9658,0,1
1,0,2021-02-13,0,1,0,0,0,0,0,0,0,0.0,86400,0.0,-30.7,456.0,0,False,,False,0,2021-02-12,3,1,61.0,53.0,29.0,20.0,0,0,2021-02-12,0,1,0,0,0,0,0,0,0,0.0,86400,0.0,-30.4,0.0,456.0,0,False,False,False,107.9658,0,1
2,0,2021-02-13,0,2,1,0,1,0,0,0,0,0.0,86400,0.0,-34.7,406.0,1,False,,False,0,2021-02-12,3,1,61.0,53.0,29.0,20.0,0,0,2021-02-12,0,2,1,0,1,0,0,0,0,0.0,86400,0.0,-34.4,0.0,406.0,1,False,False,False,107.9658,0,1
3,1,2021-02-13,0,3,2,0,2,0,1,0,0,86400.0,86400,0.0,-99.0,247.0,2,False,,False,0,2021-02-12,3,1,61.0,53.0,29.0,20.0,0,1,2021-02-12,0,3,2,0,2,0,1,0,0,86400.0,86400,0.0,-98.7,0.0,247.0,2,False,False,False,107.9658,0,1
4,1,2021-02-13,0,4,2,0,2,0,1,0,0,86400.0,86400,0.0,-99.0,247.0,2,False,,False,0,2021-02-12,3,1,61.0,53.0,29.0,20.0,0,1,2021-02-12,0,4,2,0,2,0,1,0,0,86400.0,86400,0.0,-98.7,0.0,247.0,2,False,False,False,107.9658,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1815,0,2021-02-13,0,1815,911,934,1,0,0,0,0,0.0,86400,0.0,-39.8,95.0,1,False,,False,8,2021-02-12,7,1,63.0,52.0,39.0,24.0,0,0,2021-02-12,0,1815,911,934,1,0,0,0,0,0.0,86400,0.0,-39.8,0.0,95.0,1,False,False,False,17.0185,6,1
1816,0,2021-02-13,0,1816,918,934,1,0,2,0,0,0.0,86400,0.0,-39.8,406.0,1,False,,False,8,2021-02-12,7,1,63.0,52.0,39.0,24.0,0,0,2021-02-12,0,1816,918,934,1,0,2,0,0,0.0,86400,0.0,-39.5,0.0,406.0,1,False,False,False,17.0185,6,1
1817,0,2021-02-13,0,1817,907,935,1,0,2,0,0,0.0,86400,0.0,-39.8,406.0,5,False,,False,8,2021-02-12,7,1,63.0,52.0,39.0,24.0,0,0,2021-02-12,0,1817,907,935,1,0,2,0,0,0.0,86400,0.0,-39.5,0.0,406.0,5,False,False,False,8.9776,6,1
1818,0,2021-02-13,0,1818,901,935,0,0,0,0,0,0.0,86400,0.0,-39.5,406.0,1,False,,False,8,2021-02-12,7,1,63.0,52.0,39.0,24.0,0,0,2021-02-12,0,1818,901,935,0,0,0,0,0,0.0,86400,0.0,-39.8,0.0,406.0,1,False,False,False,8.9776,6,1


In [None]:
df=df.drop(columns=['history_scalibility_score'])

In [None]:
type(df)

pandas.core.frame.DataFrame

In [None]:
df=df.reset_index()

In [None]:
df

Unnamed: 0,index,type,datetime,tip,mlid,mw_connection_no,site_id,card_type,adaptive_modulation,freq_band,severaly_error_second,error_second,unavail_second,avail_time,bbe,rxlevmax,capacity,modulation,rlf,1-day-predict,5-day-predict,nearest_station,forecast_datetime,temp_max_day5,temp_min_day5,humidity_max_day5,humidity_min_day5,wind_dir_day5,wind_speed_day5,weather_day5,history_type,history_datetime,history_tip,history_mlid,history_mw_connection_no,history_site_id,history_card_type,history_adaptive_modulation,history_freq_band,history_severaly_error_second,history_error_second,history_unavail_second,history_avail_time,history_bbe,history_rxlevmax,history_capacity,history_modulation,history_rlf,history_1-day-predict,history_5-day-predict,history_groundheight,history_clutter_class,month
0,0,0,2021-02-13,0,0,0,0,0,0,0,0,0,0.0,86400,0.0,-31.0,456.0,0,False,,False,0,2021-02-12,3,1,61.0,53.0,29.0,20.0,0,0,2021-02-12,0,0,0,0,0,0,0,0,0,0.0,86400,0.0,-31.0,456.0,0,False,False,False,107.9658,0,1
1,1,0,2021-02-13,0,1,0,0,0,0,0,0,0,0.0,86400,0.0,-30.7,456.0,0,False,,False,0,2021-02-12,3,1,61.0,53.0,29.0,20.0,0,0,2021-02-12,0,1,0,0,0,0,0,0,0,0.0,86400,0.0,-30.4,456.0,0,False,False,False,107.9658,0,1
2,2,0,2021-02-13,0,2,1,0,1,0,0,0,0,0.0,86400,0.0,-34.7,406.0,1,False,,False,0,2021-02-12,3,1,61.0,53.0,29.0,20.0,0,0,2021-02-12,0,2,1,0,1,0,0,0,0,0.0,86400,0.0,-34.4,406.0,1,False,False,False,107.9658,0,1
3,3,1,2021-02-13,0,3,2,0,2,0,1,0,0,86400.0,86400,0.0,-99.0,247.0,2,False,,False,0,2021-02-12,3,1,61.0,53.0,29.0,20.0,0,1,2021-02-12,0,3,2,0,2,0,1,0,0,86400.0,86400,0.0,-98.7,247.0,2,False,False,False,107.9658,0,1
4,4,1,2021-02-13,0,4,2,0,2,0,1,0,0,86400.0,86400,0.0,-99.0,247.0,2,False,,False,0,2021-02-12,3,1,61.0,53.0,29.0,20.0,0,1,2021-02-12,0,4,2,0,2,0,1,0,0,86400.0,86400,0.0,-98.7,247.0,2,False,False,False,107.9658,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1815,1815,0,2021-02-13,0,1815,911,934,1,0,0,0,0,0.0,86400,0.0,-39.8,95.0,1,False,,False,8,2021-02-12,7,1,63.0,52.0,39.0,24.0,0,0,2021-02-12,0,1815,911,934,1,0,0,0,0,0.0,86400,0.0,-39.8,95.0,1,False,False,False,17.0185,6,1
1816,1816,0,2021-02-13,0,1816,918,934,1,0,2,0,0,0.0,86400,0.0,-39.8,406.0,1,False,,False,8,2021-02-12,7,1,63.0,52.0,39.0,24.0,0,0,2021-02-12,0,1816,918,934,1,0,2,0,0,0.0,86400,0.0,-39.5,406.0,1,False,False,False,17.0185,6,1
1817,1817,0,2021-02-13,0,1817,907,935,1,0,2,0,0,0.0,86400,0.0,-39.8,406.0,5,False,,False,8,2021-02-12,7,1,63.0,52.0,39.0,24.0,0,0,2021-02-12,0,1817,907,935,1,0,2,0,0,0.0,86400,0.0,-39.5,406.0,5,False,False,False,8.9776,6,1
1818,1818,0,2021-02-13,0,1818,901,935,0,0,0,0,0,0.0,86400,0.0,-39.5,406.0,1,False,,False,8,2021-02-12,7,1,63.0,52.0,39.0,24.0,0,0,2021-02-12,0,1818,901,935,0,0,0,0,0,0.0,86400,0.0,-39.8,406.0,1,False,False,False,8.9776,6,1


In [None]:
df.columns = df.columns.str.replace("5", "1")

In [None]:
df

Unnamed: 0,index,type,datetime,tip,mlid,mw_connection_no,site_id,card_type,adaptive_modulation,freq_band,severaly_error_second,error_second,unavail_second,avail_time,bbe,rxlevmax,capacity,modulation,rlf,1-day-predict,1-day-predict.1,nearest_station,forecast_datetime,temp_max_day1,temp_min_day1,humidity_max_day1,humidity_min_day1,wind_dir_day1,wind_speed_day1,weather_day1,history_type,history_datetime,history_tip,history_mlid,history_mw_connection_no,history_site_id,history_card_type,history_adaptive_modulation,history_freq_band,history_severaly_error_second,history_error_second,history_unavail_second,history_avail_time,history_bbe,history_rxlevmax,history_capacity,history_modulation,history_rlf,history_1-day-predict,history_1-day-predict.1,history_groundheight,history_clutter_class,month
0,0,0,2021-02-13,0,0,0,0,0,0,0,0,0,0.0,86400,0.0,-31.0,456.0,0,False,,False,0,2021-02-12,3,1,61.0,53.0,29.0,20.0,0,0,2021-02-12,0,0,0,0,0,0,0,0,0,0.0,86400,0.0,-31.0,456.0,0,False,False,False,107.9658,0,1
1,1,0,2021-02-13,0,1,0,0,0,0,0,0,0,0.0,86400,0.0,-30.7,456.0,0,False,,False,0,2021-02-12,3,1,61.0,53.0,29.0,20.0,0,0,2021-02-12,0,1,0,0,0,0,0,0,0,0.0,86400,0.0,-30.4,456.0,0,False,False,False,107.9658,0,1
2,2,0,2021-02-13,0,2,1,0,1,0,0,0,0,0.0,86400,0.0,-34.7,406.0,1,False,,False,0,2021-02-12,3,1,61.0,53.0,29.0,20.0,0,0,2021-02-12,0,2,1,0,1,0,0,0,0,0.0,86400,0.0,-34.4,406.0,1,False,False,False,107.9658,0,1
3,3,1,2021-02-13,0,3,2,0,2,0,1,0,0,86400.0,86400,0.0,-99.0,247.0,2,False,,False,0,2021-02-12,3,1,61.0,53.0,29.0,20.0,0,1,2021-02-12,0,3,2,0,2,0,1,0,0,86400.0,86400,0.0,-98.7,247.0,2,False,False,False,107.9658,0,1
4,4,1,2021-02-13,0,4,2,0,2,0,1,0,0,86400.0,86400,0.0,-99.0,247.0,2,False,,False,0,2021-02-12,3,1,61.0,53.0,29.0,20.0,0,1,2021-02-12,0,4,2,0,2,0,1,0,0,86400.0,86400,0.0,-98.7,247.0,2,False,False,False,107.9658,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1815,1815,0,2021-02-13,0,1815,911,934,1,0,0,0,0,0.0,86400,0.0,-39.8,95.0,1,False,,False,8,2021-02-12,7,1,63.0,52.0,39.0,24.0,0,0,2021-02-12,0,1815,911,934,1,0,0,0,0,0.0,86400,0.0,-39.8,95.0,1,False,False,False,17.0185,6,1
1816,1816,0,2021-02-13,0,1816,918,934,1,0,2,0,0,0.0,86400,0.0,-39.8,406.0,1,False,,False,8,2021-02-12,7,1,63.0,52.0,39.0,24.0,0,0,2021-02-12,0,1816,918,934,1,0,2,0,0,0.0,86400,0.0,-39.5,406.0,1,False,False,False,17.0185,6,1
1817,1817,0,2021-02-13,0,1817,907,935,1,0,2,0,0,0.0,86400,0.0,-39.8,406.0,5,False,,False,8,2021-02-12,7,1,63.0,52.0,39.0,24.0,0,0,2021-02-12,0,1817,907,935,1,0,2,0,0,0.0,86400,0.0,-39.5,406.0,5,False,False,False,8.9776,6,1
1818,1818,0,2021-02-13,0,1818,901,935,0,0,0,0,0,0.0,86400,0.0,-39.5,406.0,1,False,,False,8,2021-02-12,7,1,63.0,52.0,39.0,24.0,0,0,2021-02-12,0,1818,901,935,0,0,0,0,0,0.0,86400,0.0,-39.8,406.0,1,False,False,False,8.9776,6,1


In [None]:
df.to_csv('/content/drive/MyDrive/FYP/val_preprocessed_data.csv')