## Import Statements

In [1]:
import pandas as pd
import pytz
import warnings

from modules import schemas, College1AcPtTimeSeries
from modules.dataframe_manip import get_building_accesspoints,csv_to_timeseries_df

## Local Variables and Files

In [2]:
filename = 'wifi_data_until_20190204.csv'

all_buildings = [
    '118-8TH','1567TH','345C',
    'ALEXANDER','ANDREW','BALDWIN','BRACKETT','BRIDGES','CARNEGIE',
    'CLARK','CROOKSHANK','DRAPER', 'FARM', 'FRANK', 'FRARY', 'GIBONEY', 
    'GIBSON', 'GROUNDS', 'HAHN', 'HALDEMAN','HARWOOD','ITB', 'KENYON', 
    'LAWRY', 'LEB', 'LEBUS', 'MASON', 'MCCARTHY', 'MERRIT','MILLIKAN', 
    'MUSEUM', 'NORTON', 'OLDENBORG','PAULEY','PEARSON','PENDLETON','POMONA',
    'RAINS','REMBRANDT', 'SCC', 'SEAVER','SGM','SMILEY','SMITH','SONTAG',
    'STUDIOART','SUMNER','THATCHER','WALKER','WALTON','WIG','None'
]

## Functions
* get_hashed_data - takes in a csv file and the list of all the official building names and hashes them for publication.

In [3]:
def get_hashed_data(filename, all_buildings):
    schema: schemas.AcPtTimeSeries = College1AcPtTimeSeries.College1AcPtTimeSeries
    
    # Read csv file
    input_df = csv_to_timeseries_df(filename)
    
    # Get list of APs in csv file
    ap_names = list(input_df.columns)
    save = input_df.index
    # Set up variables for looping through each AP.
    # APs that are not in all_buildings will have their own column
    new_column = []
    count = 0
    num_bkup = len(all_buildings)
    
    
    for x in ap_names:
        new_name = ''
        
        bui_name = str(schema.col_to_building(x, safe=True))
        if bui_name == 'None':
            num = num_bkup
            num_bkup = num_bkup + 1
            warnings.warn("Warning: Did not find corresponding building name in ap "+x
                          +". AP will have its own separate column and be marked with UKN")
            new_name = 'UKN' # Marking Buildings that were not found in schema
        else:
            num = all_buildings.index(bui_name)
        new_name = new_name + 'COL-BUILDING'+"{:02d}".format(num)+'-AP'+"{:03d}".format(count)
        new_column.append(new_name)
        count = count + 1
        
    input_df.columns = new_column
    input_df.index = save
    print('Hashing now please wait...')
    export_csv = input_df.to_csv (r'hashed_data.csv', header=True) # index = None,
    print('Function finished! Check if hashed_data.csv exists.')

## Code starts here

In [4]:
get_hashed_data(filename, all_buildings)
check = pd.read_csv("hashed_data.csv")



Hashing now please wait...
Function finished! Check if hashed_data.csv exists.


In [5]:
check

Unnamed: 0,time,UKNCOL-BUILDING52-AP000,UKNCOL-BUILDING53-AP001,UKNCOL-BUILDING54-AP002,UKNCOL-BUILDING55-AP003,UKNCOL-BUILDING56-AP004,UKNCOL-BUILDING57-AP005,UKNCOL-BUILDING58-AP006,UKNCOL-BUILDING59-AP007,UKNCOL-BUILDING60-AP008,...,UKNCOL-BUILDING654-AP602,UKNCOL-BUILDING655-AP603,UKNCOL-BUILDING656-AP604,UKNCOL-BUILDING657-AP605,UKNCOL-BUILDING658-AP606,UKNCOL-BUILDING659-AP607,UKNCOL-BUILDING660-AP608,UKNCOL-BUILDING661-AP609,UKNCOL-BUILDING662-AP610,UKNCOL-BUILDING663-AP611
0,2018-07-18 23:10:14+00:00,5.0,,,,1.0,,1.0,,,...,,,,1.0,,15.0,8.0,5.0,,
1,2018-07-18 23:15:23+00:00,5.0,,,,1.0,,1.0,,,...,,,,1.0,,13.0,8.0,4.0,,
2,2018-07-18 23:20:38+00:00,5.0,,,,1.0,,1.0,,,...,,,,1.0,,14.0,7.0,4.0,,
3,2018-07-18 23:25:06+00:00,7.0,,,,1.0,,1.0,,,...,,,,1.0,,14.0,8.0,5.0,,
4,2018-07-18 23:30:06+00:00,8.0,,,,1.0,,1.0,,,...,,,,1.0,,14.0,6.0,4.0,,
5,2018-07-18 23:35:08+00:00,3.0,,,,1.0,,1.0,,,...,,,,1.0,,16.0,6.0,5.0,,
6,2018-07-18 23:40:12+00:00,2.0,,,,1.0,,1.0,,,...,,,,1.0,,17.0,6.0,5.0,,
7,2018-07-18 23:45:16+00:00,1.0,,,,1.0,,1.0,,,...,,,,1.0,,24.0,6.0,6.0,,
8,2018-07-18 23:50:19+00:00,1.0,,,,1.0,,2.0,,,...,,,,1.0,,23.0,6.0,5.0,,
9,2018-07-18 23:55:12+00:00,1.0,,,,1.0,,1.0,,,...,,,,1.0,,21.0,6.0,5.0,,
