In [1]:
# Importing Required Libraries
import pandas as pd
import os
import sqlite3
import mysql.connector

In [2]:
# Function to convert date format
def convert_date_format(date_series):
    return pd.to_datetime(date_series, format='%d/%m/%Y').dt.strftime('%Y/%m/%d')

In [15]:
# Data path 
file_path = r'C:\Users\mh30f\OneDrive\Documents\GitHub\YDOC2MySQL\Resources'

# Initialize a list to store all dataframes
all_dataframes = []

# Iterate over all CSV files in the specified directory
for filename in os.listdir(file_path):
    if filename.endswith('.csv'):
        file_path_full = os.path.join(file_path, filename)

        # Read CSV without a header
        db_name = pd.read_csv(file_path_full, header=None)

        # Use the third row (index 2) as the header
        db_name.columns = db_name.iloc[2]

        # Drop the first three rows, as they are not data anymore
        db_name = db_name.drop([0, 1, 2]).reset_index(drop=True)

        # Check if 'Date' column format is %d/%m/%Y
        try:
            test_date = pd.to_datetime(db_name['DATE'], format='%d/%m/%Y', errors='raise')            
            db_name['DATE'] = convert_date_format(db_name['DATE'])
        except ValueError:            
            pass  

        # Merge 'Date' and 'Time' columns into a new column 'DateTime' using a flexible parser
        db_name['DateTime'] = pd.to_datetime(db_name['DATE'] + ' ' + db_name['TIME'], errors='coerce')


        # Drop the original 'Date' and 'Time' columns if no longer needed
        db_name = db_name.drop(columns=['DATE', 'TIME'])

        # Reorder columns to move 'DateTime' to the first position
        db_name = db_name[['DateTime'] + [col for col in db_name.columns if col != 'DateTime']]

        # Store the processed DataFrame in the list
        all_dataframes.append(db_name)

# Optionally, concatenate all DataFrames into a single DataFrame if needed
all_data = pd.concat(all_dataframes, ignore_index=True)

# Drop columns with all NaN values
all_data = all_data.dropna(axis=1, how='all')

# Convert all columns to numeric types except the first column 'Timestamp'
for col in all_data.columns[1:]:
    all_data[col] = pd.to_numeric(all_data[col], errors='coerce')

# Show the final DataFrame
all_data

2,DateTime,P1,P2,P3,P4,P5,P6,P7,P01,P02,P03,P04,P05,P06,P07
0,2024-09-25 00:15:00,13.599,31414.99,31395.360,28.673,3.318,10.281,20.000,,,,,,,
1,2024-09-25 00:30:00,13.600,30987.75,30964.870,28.906,3.321,10.279,20.000,,,,,,,
2,2024-09-25 00:45:00,13.600,30351.10,30328.690,28.906,3.322,10.279,19.900,,,,,,,
3,2024-09-25 01:00:00,13.600,30819.11,30796.350,28.906,3.323,10.277,19.800,,,,,,,
4,2024-09-25 01:15:00,13.599,30680.76,30661.590,28.673,3.323,10.277,19.600,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,2024-09-25 05:00:00,14.838,17.15,10.397,4.441,9.700,3.300,0.196,,,,,,,
60,2024-09-25 05:15:00,14.841,17.15,10.397,4.444,9.400,3.300,0.201,,,,,,,
61,2024-09-25 05:30:00,14.841,17.15,10.401,4.440,9.000,3.300,0.201,,,,,,,
62,2024-09-25 05:45:00,14.844,17.15,10.400,4.444,8.600,3.300,0.200,,,,,,,


In [16]:
# Export to csv
output_file_path = r'C:\Users\mh30f\OneDrive\Documents\GitHub\YDOC2MySQL\output\all_data.csv'
all_data.to_csv(output_file_path, index=False)

In [None]:
# Establish a connection to the SQLite database
db_path = r'C:\Users\mh30f\OneDrive\Documents\GitHub\YDOC2MySQL\output\Ydocdatabase.db'


# Establish a connection to the database
conn = sqlite3.connect(db_path)

# Write the data from the DataFrame to the YDOC table
all_data.to_sql('YDOC', conn, if_exists='append', index=False)

# Commit the changes
conn.commit()

# Close the connection
conn.close()

In [None]:
import mysql.connector

# MySQL database configuration
MYSQL_HOST = "localhost"
MYSQL_USER = "mh30f"
MYSQL_PASSWORD = "Moh@1977"
MYSQL_DATABASE = "YDOCDatabase"

connection = None
cursor = None

try:
    # Connect to MySQL without specifying a database first
    connection = mysql.connector.connect(
        host=MYSQL_HOST,
        user=MYSQL_USER,
        password=MYSQL_PASSWORD
    )
    cursor = connection.cursor()

    # Create the database if it doesn't exist
    cursor.execute(f"CREATE DATABASE IF NOT EXISTS {MYSQL_DATABASE}")

    # Select the database
    cursor.execute(f"USE {MYSQL_DATABASE}")

    # Create the YDOC table if it doesn't exist
    create_table_query = f"""
    CREATE TABLE IF NOT EXISTS YDOC (
        `Timestamp` DATETIME,
        `ABS_Pressure` FLOAT,
        `Conductivity_Linear` FLOAT,
        `Conductivity_NFL` FLOAT,
        `Water_Temp` FLOAT,
        `Compensated_Pressure` FLOAT,
        `Barometric_Pressure` FLOAT,
        `Casing_Temperature` FLOAT,
        `EXT_BATT` FLOAT DEFAULT NULL,
        `INT_BATT` FLOAT DEFAULT NULL,
        `Min_Voltage` FLOAT DEFAULT NULL,
        `Max_Current` FLOAT DEFAULT NULL,
        `Signal` FLOAT DEFAULT NULL,
        `Signal_Strength` FLOAT DEFAULT NULL,
        `Access_Technology` VARCHAR(50) DEFAULT NULL
    )
    """
    cursor.execute(create_table_query)

    # Replace spaces with underscores in column names
    all_data.columns = all_data.columns.str.replace(' ', '_')

    # Drop rows with NaN values
    all_data.dropna(inplace=True)

    # Dynamically generate column names and placeholders for SQL
    columns = all_data.columns
    placeholders = ', '.join(['%s'] * len(columns))  # Generates the required number of %s placeholders

    # SQL query with dynamic columns, using backticks for column names
    sql = f"INSERT INTO YDOC (`{'`, `'.join(columns)}`) VALUES ({placeholders})"

    # Now proceed with inserting data into the YDOC table
    for index, row in all_data.iterrows():
        # print(f"Row {index}: {tuple(row)}")  # Debugging
        # print(f"Number of values: {len(tuple(row))}")  # Debugging

        cursor.execute(sql, tuple(row))

    # Commit the changes
    connection.commit()
    

except mysql.connector.Error as e:
    print(f"An error occurred: {e}")

finally:
    # Close the cursor and connection
    if cursor is not None:
        cursor.close()
    if connection is not None:
        connection.close()