In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.pipeline import make_pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler

# Load the dataset
file_path = 'combined_data3.csv'  # Update this path as needed
data = pd.read_csv(file_path)

# Fill NaN values with 0.0
data.fillna(0.0, inplace=True)

# Define the target variable and features
X = data.drop('Uplink_thrpt', axis=1)
y = data['Uplink_thrpt']

# Identify categorical and numerical columns
categorical_cols = X.select_dtypes(include=['object']).columns
numerical_cols = X.select_dtypes(exclude=['object']).columns

# Create a preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(), categorical_cols)
    ])

# Define the model pipeline
pipeline = make_pipeline(
    preprocessor,
    DecisionTreeRegressor(random_state=42)
)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
pipeline.fit(X_train, y_train)

# Predict on the entire dataset
data['Predicted_Uplink_thrpt'] = pipeline.predict(X)

# Function to apply conditions with respect to priority
def filter_with_conditions(data, conditions):
    filtered_data = data.copy()
    
    for condition in conditions:
        if 'Predicted' in condition['column']:
                for condition in conditions:
                    if condition['operation'] == '=':
                        # Handle equality condition for categorical/string columns
                        query_string = f"{condition['column']} == '{condition['value']}'"
                    else:
                        # Handle other conditions (e.g., '<', '>', etc.)
                        query_string = f"{condition['column']} {condition['operation']} {condition['value']}"
        
                    filtered_data = filtered_data.query(query_string)
        else:
            for condition in conditions:
                if condition['operation'] == '=':
                    # Handle equality condition for categorical/string columns
                    query_string = f"{condition['column']} == '{condition['value']}'"
                else:
                    # Handle other conditions (e.g., '<', '>', etc.)
                    query_string = f"{condition['column']} {condition['operation']} {condition['value']}"
        
                filtered_data = filtered_data.query(query_string)
                       
    return filtered_data

# Define the conditions in order of priority
conditions = [
#    {'column': 'Uplink_delay', 'operation': '<', 'value': 100},
    {'column': 'Communication_link', 'operation': '=', 'value': 'Down Link'},
    {'column': 'Downlink_delay', 'operation': '<', 'value': 50},
#    {'column': 'Uplink_pcktloss', 'operation': '<', 'value': 2},
    {'column': 'Downlink_pcktloss', 'operation': '<', 'value': 1},
    {'column': 'sta_nrg', 'operation': '<', 'value': 57},
#    {'column': 'Uplink_thrpt', 'operation': '<', 'value': 1},
    {'column': 'Downlink_thrpt', 'operation': '<', 'value': 1}
]

# Apply the conditions
optimal_configurations = filter_with_conditions(data, conditions)

# Drop the Predicted_Uplink_thrpt column from the display
optimal_configurations = optimal_configurations.drop(columns=['Predicted_Uplink_thrpt'])
# Output the results DataFrame to CSV file
optimal_configurations.to_csv('Smart_homes.csv', index=False)

# Print the optimal configurations
print("Optimal Configurations to achieve the required conditions:")
print(optimal_configurations)

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


Optimal Configurations to achieve the required conditions:
      Communication_link Power_save_mechanism Generated_traffic  \
4771           Down Link                  PSM           Poisson   
4932           Down Link                  PSM           Poisson   
5119           Down Link                  PSM           Poisson   
5632           Down Link                  PSM           Poisson   
5669           Down Link                  PSM           Poisson   
7128           Down Link                  PSM           Poisson   
7343           Down Link                  PSM          Periodic   
8945           Down Link                  PSM           Poisson   
8976           Down Link                  PSM           Poisson   
9055           Down Link                  PSM          Periodic   
9298           Down Link                  PSM           Poisson   
9693           Down Link                  PSM          Periodic   
10403          Down Link                  PSM          Periodic   
114