-
Notifications
You must be signed in to change notification settings - Fork 0
/
tableSorting.py
102 lines (78 loc) · 3.99 KB
/
tableSorting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# Solar Irradiance Data Cleaning
# Built with Python 3.8
# Author: Nicholas Navarro
# First Revision: 16.5.22
import os
import pandas as pd
from sqlalchemy import create_engine
import numpy as np
import glob
import secrets
# handles connection to database, replace IP and table name to whatever database
# this uses a postgres docker container that is running on the nerc-postgres raspberry pi - connected to the NERC network.
# IP address is raspi. ip on network.
engine = create_engine('postgresql+psycopg2://'+secrets.docker_user+':'+secrets.docker_password+'@172.24.59.97:5432/RadianceDB')
# file location of the CSV exported by EOX, checks directory for csv file. Change the directory to your liking
for file_name in glob.glob('./testData/'+'*.CSV'):
fileLocation = file_name
# If you want to record the cleaned data to a CSV, use panda to parse to desired output CSV File
outputFile = './output.csv'
# reads csv file from file location variable, change as necessary
df = pd.read_csv(fileLocation, sep='[,]', engine='python', header=None)
# removes top 7 rows of data, these include sensor temp. and model, which is not needed
topObj = df.iloc[:6]
topObjNew = topObj.iloc[:2]
# transposes the data to column format, if you need to export it
topObjTransposed = topObjNew.T
# Time recording section, isolates date and time, and transposes it horizontally
dateTime = topObjTransposed
dateTime.columns = dateTime.iloc[0]
dateTime.drop(index=dateTime.index[0], axis=0, inplace=True)
isolatedDateTime = dateTime.iloc[1]
print(isolatedDateTime)
# isolates the first row for date time, converts it into a 2d array
# converts 2d day to numpy array and flattens it to list.
timeStamp2DArray = isolatedDateTime.values.tolist()
timeStamp2DArray = np.array(timeStamp2DArray)
flatten = timeStamp2DArray.flatten()
print(flatten)
# converses the flatten list to datetime format
# timeStamp = pd.to_datetime(dateTimeTransposed['Date'] + ' ' + dateTimeTransposed['Time'], cache=True)
merged = ' '.join(flatten)
timeStampTest = pd.to_datetime(merged)
print(merged)# checking if date time conversion was done correctly
# END ----------------
# Data readings, wavelength and irradiance
bottomData = df.iloc[7:]
# Focus only on the first data points, at EOX updates every hour, meaning first data set in XX:00
hourData = bottomData.drop(columns=[2, 3, 4, 5, 6, 7])
# Resets headers of dataframe to 'wavelength and irradiance'
hourData.columns = hourData.iloc[0]
# Resets index so first data entry is '0' on index
hourData.drop(index=hourData.index[0], axis=0, inplace=True)
# before conversion takes place, original datatypes are held, not important, but good for error catching
beforeConversion = hourData.dtypes
# converts all data on irradiance column to float, important for comparison
hourData['Irradiance(W/m2/um)'] = hourData['Irradiance(W/m2/um)'].astype('float')
# -----------------------------------------
# This section of code focuses on cleaning of data, further development needed.
# iterates value per row on column irradiance to see if value is lower than 200 (Limit set by Hugo)
# hourData.drop(hourData[hourData['Irradiance(W/m2/um)'] < 200.0].index, inplace=True)
# ------------------------------------------
# resets the index again so that the first data point is 0 on index
hourData = hourData.reset_index(drop=True)
# inserts NaT fields to the converted datetime variables
hourData.insert(1, 'Timestamp', 'x')
hourData['Timestamp'] = merged
hourData.replace({pd.NaT: str(merged)}, inplace=True)
# -------------------------------
# Takes only first 3 columns
finalData = hourData[hourData.columns[0:3]]
print(finalData)
# sends data to csv, for testing purposes - uncomment to run, will need 'output.csv' file in the same directory
# finalData.to_csv(r'./output.csv', index=False)
# sends data to sql using sql alchemy engine, and replaces the data on database.
finalData.to_sql('radiance', engine, if_exists='append', index=False)
print('\nData succesfully transfered to Postgres\n')
os.remove(fileLocation) # deletes CSV after use
print('File Deleted.')