In [2]:
# Import necessary libraries
import pandas as pd

# Function to read a Feather file
def read_feather_file(file_path):
    try:
        # Read the feather file
        df = pd.read_feather(file_path)
        return df
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# File paths 
# telemetry_large_consumers_file = 'telemetry_large_consumers_DCW.feather'
standard_profiles_file = r'D:\raw\standard_profiles_DCW.feather'
customer_attributes_file = r'D:\raw\customer_attributes_DCW.feather'
telemetry_file=r'D:\raw\telemetry_large_consumers_DCW.feather'

# Read the files
# telemetry_large_consumers_df = read_feather_file(telemetry_large_consumers_file)
standard_profiles_df = read_feather_file(standard_profiles_file)
customer_attributes_df = read_feather_file(customer_attributes_file)
telemetry_df=read_feather_file(telemetry_file)

In [3]:
print("Telemetry DataFrame ID column type:", telemetry_df['RND_ID'].dtype)
print("Customer Attributes DataFrame ID column type:", customer_attributes_df['RND_ID'].dtype)

Telemetry DataFrame ID column type: object
Customer Attributes DataFrame ID column type: int32


In [4]:
telemetry_df['RND_ID'] = telemetry_df['RND_ID'].astype(str)
customer_attributes_df['RND_ID'] = customer_attributes_df['RND_ID'].astype(str)

In [5]:
# Merging the dataframes on the ID columns
merged_data = pd.merge(telemetry_df, customer_attributes_df, left_on='RND_ID', right_on='RND_ID', how='inner')

In [5]:
merged_data.head()

Unnamed: 0,RND_ID,2023-01-01 00:00,2023-01-01 00:15,2023-01-01 00:30,2023-01-01 00:45,2023-01-01 01:00,2023-01-01 01:15,2023-01-01 01:30,2023-01-01 01:45,2023-01-01 02:00,...,2023-12-31 22:00,2023-12-31 22:15,2023-12-31 22:30,2023-12-31 22:45,2023-12-31 23:00,2023-12-31 23:15,2023-12-31 23:30,2023-12-31 23:45,BASELOAD_PROFILE,AANSLUITCATEGORIE
0,8423,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,010,AC4A
1,6756,36.0,32.0,32.0,36.0,32.0,36.0,32.0,36.0,32.0,...,34.09,33.31,30.57,29.65,32.06,29.13,28.25,27.85,E3B,AC4B
2,1077,0.0,4.0,0.0,4.0,0.0,4.0,0.0,4.0,0.0,...,1.5,2.0,1.5,1.5,2.0,1.5,2.0,1.5,008,AC4A
3,8061,16.0,8.0,12.0,8.0,12.0,16.0,12.0,12.0,12.0,...,8.8,11.0,10.19,9.19,8.8,9.0,11.0,12.4,001,AC4B
4,10575,0.0,4.0,0.0,0.0,4.0,0.0,0.0,4.0,0.0,...,3.54,1.15,1.1,1.1,1.1,1.1,1.1,1.12,008,AC4B


In [6]:
import pandas as pd

# Assuming 'df' is your loaded DataFrame
unique_categories = merged_data['AANSLUITCATEGORIE'].unique()
print("Unique Categories:", unique_categories)


Unique Categories: ['AC4A' 'AC4B']


In [7]:
df=merged_data

In [8]:
# Create a dictionary to hold the DataFrames split by category
category_dfs = {category: df[df['AANSLUITCATEGORIE'] == category] for category in unique_categories}

# Example to access a DataFrame for a specific category
category_ac4a = category_dfs['AC4A']

print(category_ac4a.head())


   RND_ID  2023-01-01 00:00  2023-01-01 00:15  2023-01-01 00:30  \
0    8423               0.0               0.0               0.0   
2    1077               0.0               4.0               0.0   
5    4797               0.0               4.0               0.0   
6    8484               0.0               4.0               0.0   
13   7276              20.0              20.0              16.0   

    2023-01-01 00:45  2023-01-01 01:00  2023-01-01 01:15  2023-01-01 01:30  \
0                0.0               4.0               0.0               0.0   
2                4.0               0.0               4.0               0.0   
5                0.0               4.0               0.0               0.0   
6                0.0               0.0               4.0               0.0   
13              20.0              20.0              20.0              16.0   

    2023-01-01 01:45  2023-01-01 02:00  ...  2023-12-31 22:00  \
0                0.0               4.0  ...              0.00  

In [13]:
# Saving each category DataFrame to a CSV file
for category, category_df in category_dfs.items():
     category_df.to_csv(r'C:\Users\20235624\Documents\raw\raw\g_data.csv')
   



In [9]:
category_ac4b= category_dfs['AC4B']

print(category_ac4b.head())

  RND_ID  2023-01-01 00:00  2023-01-01 00:15  2023-01-01 00:30  \
1   6756              36.0              32.0              32.0   
3   8061              16.0               8.0              12.0   
4  10575               0.0               4.0               0.0   
7   4080              16.0              20.0              24.0   
8   4080              16.0              20.0              24.0   

   2023-01-01 00:45  2023-01-01 01:00  2023-01-01 01:15  2023-01-01 01:30  \
1              36.0              32.0              36.0              32.0   
3               8.0              12.0              16.0              12.0   
4               0.0               4.0               0.0               0.0   
7              20.0              20.0              16.0              16.0   
8              20.0              20.0              16.0              16.0   

   2023-01-01 01:45  2023-01-01 02:00  ...  2023-12-31 22:00  \
1              36.0              32.0  ...             34.09   
3           

In [10]:
for category, category_df in category_dfs.items():
     category_df.to_csv(r'C:\Users\20235624\Documents\raw\raw\d_data.csv')