In [None]:
%%bash
# Get data from https://drive.google.com/file/d/1HuxIvMWF-0xbgza1Y_o5vYVwtpv-fekZ/view?usp=sharing

# combine all data to a single file 
cat Data/*.txt > Data/consolidated_data.csv

In [19]:
import pandas as pd 
raw = pd.read_csv('Data/consolidated_data.csv')
raw.columns = ['time','lat','long','mobile','wifi']
raw = raw.drop_duplicates().reset_index(drop=True)
raw

Unnamed: 0,time,lat,long,mobile,wifi
0,2024-04-11 16:17:30.859030,40.112640,-88.218734,-91,-50.0
1,2024-04-11 16:17:40.476643,40.112646,-88.218679,-91,-50.0
2,2024-04-11 16:18:12.819412,40.112550,-88.218558,-98,-55.0
3,2024-04-11 16:18:12.828600,40.112550,-88.218558,-98,-55.0
4,2024-04-11 16:18:12.835247,40.112550,-88.218558,-88,-50.0
...,...,...,...,...,...
7202,2024-04-26 21:06:01.819983,40.108236,-88.235220,-76,-127.0
7203,2024-04-26 21:06:01.839555,40.108236,-88.235220,-76,-127.0
7204,2024-04-26 21:06:01.852089,40.108236,-88.235220,-76,-127.0
7205,2024-04-26 21:06:01.895872,40.108236,-88.235220,-79,-127.0


In [20]:
# cleanup wifi data
raw.wifi = raw.wifi.fillna(value=-127)

In [21]:
# cleanup mobile data
raw.mobile = raw.mobile.fillna(value=-127)

In [22]:
# positive offsetting for ease of data manipulation
raw.wifi = raw.wifi.add(127)
raw.mobile = raw.mobile.add(127)

from datetime import datetime

# convert date string to ts
def date_string_to_epoch(date_string):
    dt_object = datetime.strptime(date_string, '%Y-%m-%d %H:%M:%S.%f')
    epoch_timestamp = int(dt_object.timestamp())
    return epoch_timestamp

raw.time = raw.time.apply(date_string_to_epoch)
raw



Unnamed: 0,time,lat,long,mobile,wifi
0,1712870250,40.112640,-88.218734,36,77.0
1,1712870260,40.112646,-88.218679,36,77.0
2,1712870292,40.112550,-88.218558,29,72.0
3,1712870292,40.112550,-88.218558,29,72.0
4,1712870292,40.112550,-88.218558,39,77.0
...,...,...,...,...,...
7202,1714183561,40.108236,-88.235220,51,0.0
7203,1714183561,40.108236,-88.235220,51,0.0
7204,1714183561,40.108236,-88.235220,51,0.0
7205,1714183561,40.108236,-88.235220,48,0.0


In [23]:
# splitting into wifi and mobile data
wifi = raw[raw.wifi > 0].reset_index(drop=True).rename(columns={'wifi':'wifi_strength'})
wifi = wifi.drop(columns='mobile')
mobile = raw[raw.mobile > 0].reset_index(drop=True).rename(columns={'mobile':'mobile_strength'})
mobile = mobile.drop(columns='wifi')

In [24]:
wifi

Unnamed: 0,time,lat,long,wifi_strength
0,1712870250,40.112640,-88.218734,77.0
1,1712870260,40.112646,-88.218679,77.0
2,1712870292,40.112550,-88.218558,72.0
3,1712870292,40.112550,-88.218558,72.0
4,1712870292,40.112550,-88.218558,77.0
...,...,...,...,...
1651,1714173482,40.103849,-88.234718,61.0
1652,1714173483,40.103849,-88.234718,61.0
1653,1714173483,40.103842,-88.234716,61.0
1654,1714173484,40.103833,-88.234715,62.0


In [25]:
mobile

Unnamed: 0,time,lat,long,mobile_strength
0,1712870250,40.112640,-88.218734,36
1,1712870260,40.112646,-88.218679,36
2,1712870292,40.112550,-88.218558,29
3,1712870292,40.112550,-88.218558,29
4,1712870292,40.112550,-88.218558,39
...,...,...,...,...
7202,1714183561,40.108236,-88.235220,51
7203,1714183561,40.108236,-88.235220,51
7204,1714183561,40.108236,-88.235220,51
7205,1714183561,40.108236,-88.235220,48


In [26]:
mobile.to_csv('Data/mobile_cleaned.csv',index=False)
wifi.to_csv('Data/wifi_cleaned.csv',index=False)