# Intro to Data Management with Python
# Sketch an analysis for Solar Power Generation Dataset

Kritkorn Supyen (Data Enginering, Jacobs University Bremen)

# 1. Initialize Data

In [1]:
import mysql.connector
import datamanagement as dm
import re
import json

## 1.1 Connect to MySQL server

In [2]:
# Use a JSON file to hide MySQL credentials a little bit.
parameters_file = open('MySQLparameter.json')
parameters_value = json.load(parameters_file)
parameters_file.close()

db_connection, mycursor = dm.connectServer(**parameters_value)

## 1.2 Create database

Create a database named PowerGeneration

In [3]:
# Create Database
database_name = 'PowerGeneration'
print(dm.createDatabase(mycursor, database_name))

Command excuted - CREATE DATABASE PowerGeneration;


## 1.3 Use database

Use the database PowerGeneration

In [4]:
# Use Database
print(dm.useDatabase(mycursor, database_name))

Command excuted - USE PowerGeneration


## 1.4 Create tables

Create 4 tables for 4 csv files.
- Plant_1_Generation_Data.csv
- Plant_1_Weather_Sensor_Data.csv
- Plant_2_Generation_Data.csv
- Plant_2_Weather_Sensor_Data.csv.

In [5]:
# Open File and create table
file_path = '/data/PowerGeneration/'
file_names = ['Plant_1_Generation_Data.csv', 'Plant_1_Weather_Sensor_Data.csv', 'Plant_2_Generation_Data.csv', 'Plant_2_Weather_Sensor_Data.csv']
for file_name in file_names:
    # Open File
    file, csv_data = dm.openFile(file_name, ',')
    
    # Find column names
    column_names = next(csv_data)
    
    # Find table names
    table_name = file_name.split('.')[0]
    
    # Create table
    print(dm.createTable(mycursor, column_names, table_name))
    

Command excuted - CREATE TABLE Plant_1_Generation_Data (
        DATE_TIME TIMESTAMP,
        PLANT_ID VARCHAR(512),
        SOURCE_KEY VARCHAR(512),
        DC_POWER DOUBLE UNSIGNED,
        AC_POWER DOUBLE UNSIGNED,
        DAILY_YIELD DOUBLE UNSIGNED,
        TOTAL_YIELD DECIMAL(15,5) UNSIGNED   
        )
Command excuted - CREATE TABLE Plant_1_Weather_Sensor_Data (
        DATE_TIME TIMESTAMP,
        PLANT_ID VARCHAR(512),
        SOURCE_KEY VARCHAR(512),
        AMBIENT_TEMPERATURE DOUBLE,
        MODULE_TEMPERATURE DOUBLE,
        IRRADIATION DOUBLE  
        )
Command excuted - CREATE TABLE Plant_2_Generation_Data (
        DATE_TIME TIMESTAMP,
        PLANT_ID VARCHAR(512),
        SOURCE_KEY VARCHAR(512),
        DC_POWER DOUBLE UNSIGNED,
        AC_POWER DOUBLE UNSIGNED,
        DAILY_YIELD DOUBLE UNSIGNED,
        TOTAL_YIELD DECIMAL(15,5) UNSIGNED   
        )
Command excuted - CREATE TABLE Plant_2_Weather_Sensor_Data (
        DATE_TIME TIMESTAMP,
        PLANT_ID VARCHAR

## 1.5 Load data from Plant_1_Generation_Data.csv to the table

In [6]:
# Load data from Plant_1_Generation_Data.csv to the tables
file_path = '/data/PowerGeneration/Plant_1_Generation_Data.csv'
table_name = 'Plant_1_Generation_Data'
#column_names = ' PLANT_ID, SOURCE_KEY, AMBIENT_TEMPERATURE, MODULE_TEMPERATURE, IRRADIATION)'
column_names = ' PLANT_ID, SOURCE_KEY, DC_POWER, AC_POWER, DAILY_YIELD, TOTAL_YIELD)'
datetime_format = '%d-%m-%Y %H:%i'

print(dm.importData(mycursor, file_path, table_name, column_names, datetime_format))

Command excuted - LOAD DATA INFILE '/data/PowerGeneration/Plant_1_Generation_Data.csv' 
    INTO TABLE Plant_1_Generation_Data
    FIELDS TERMINATED BY ',' 
    ENCLOSED BY '"'
    LINES TERMINATED BY '
'
    IGNORE 1 ROWS
    (@DATE_TIME,  PLANT_ID, SOURCE_KEY, DC_POWER, AC_POWER, DAILY_YIELD, TOTAL_YIELD) 
    SET DATE_TIME = STR_TO_DATE(@DATE_TIME,'%d-%m-%Y %H:%i');
    


## 1.6 Load data from Plant_2_Generation_Data.csv to the table

In [7]:
# Load data from Plant_2_Generation_Data.csv to the tables
file_path = '/data/PowerGeneration/Plant_2_Generation_Data.csv'
table_name = 'Plant_2_Generation_Data'
#column_names = ' PLANT_ID, SOURCE_KEY, AMBIENT_TEMPERATURE, MODULE_TEMPERATURE, IRRADIATION)'
column_names = ' PLANT_ID, SOURCE_KEY, DC_POWER, AC_POWER, DAILY_YIELD, TOTAL_YIELD)'
datetime_format = '%Y-%m-%d %H:%i:%s'

print(dm.importData(mycursor, file_path, table_name, column_names, datetime_format))


Command excuted - LOAD DATA INFILE '/data/PowerGeneration/Plant_2_Generation_Data.csv' 
    INTO TABLE Plant_2_Generation_Data
    FIELDS TERMINATED BY ',' 
    ENCLOSED BY '"'
    LINES TERMINATED BY '
'
    IGNORE 1 ROWS
    (@DATE_TIME,  PLANT_ID, SOURCE_KEY, DC_POWER, AC_POWER, DAILY_YIELD, TOTAL_YIELD) 
    SET DATE_TIME = STR_TO_DATE(@DATE_TIME,'%Y-%m-%d %H:%i:%s');
    


## 1.7 Load data from Plant_1_Weather_Sensor_Data.csv to the table

In [8]:
# Load data from Plant_1_Weather_Sensor_Data.csv to the tables
file_path = '/data/PowerGeneration/Plant_1_Weather_Sensor_Data.csv'
table_name = 'Plant_1_Weather_Sensor_Data'
column_names = ' PLANT_ID, SOURCE_KEY, AMBIENT_TEMPERATURE, MODULE_TEMPERATURE, IRRADIATION)'
#column_names = ' PLANT_ID, SOURCE_KEY, DC_POWER, AC_POWER, DAILY_YIELD, TOTAL_YIELD)'
datetime_format = '%Y-%m-%d %H:%i:%s'

print(dm.importData(mycursor, file_path, table_name, column_names, datetime_format))

Command excuted - LOAD DATA INFILE '/data/PowerGeneration/Plant_1_Weather_Sensor_Data.csv' 
    INTO TABLE Plant_1_Weather_Sensor_Data
    FIELDS TERMINATED BY ',' 
    ENCLOSED BY '"'
    LINES TERMINATED BY '
'
    IGNORE 1 ROWS
    (@DATE_TIME,  PLANT_ID, SOURCE_KEY, AMBIENT_TEMPERATURE, MODULE_TEMPERATURE, IRRADIATION) 
    SET DATE_TIME = STR_TO_DATE(@DATE_TIME,'%Y-%m-%d %H:%i:%s');
    


## 1.8 Load data from Plant_2_Weather_Sensor_Data.csv to the table

In [9]:
# Load data from Plant_2_Weather_Sensor_Data.csv to the tables
file_path = '/data/PowerGeneration/Plant_2_Weather_Sensor_Data.csv'
table_name = 'Plant_2_Weather_Sensor_Data'
column_names = ' PLANT_ID, SOURCE_KEY, AMBIENT_TEMPERATURE, MODULE_TEMPERATURE, IRRADIATION)'
#column_names = ' PLANT_ID, SOURCE_KEY, DC_POWER, AC_POWER, DAILY_YIELD, TOTAL_YIELD)'
datetime_format = '%Y-%m-%d %H:%i:%s'

print(dm.importData(mycursor, file_path, table_name, column_names, datetime_format))

Command excuted - LOAD DATA INFILE '/data/PowerGeneration/Plant_2_Weather_Sensor_Data.csv' 
    INTO TABLE Plant_2_Weather_Sensor_Data
    FIELDS TERMINATED BY ',' 
    ENCLOSED BY '"'
    LINES TERMINATED BY '
'
    IGNORE 1 ROWS
    (@DATE_TIME,  PLANT_ID, SOURCE_KEY, AMBIENT_TEMPERATURE, MODULE_TEMPERATURE, IRRADIATION) 
    SET DATE_TIME = STR_TO_DATE(@DATE_TIME,'%Y-%m-%d %H:%i:%s');
    


## 1.9 Commit and close the MySQL cursor and connection.

In [10]:
# Commit command
db_connection.commit()

In [11]:
# Close cursor
mycursor.close()

# Close connection
db_connection.close()