In [2]:
import re
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [13]:
def extract_features(polynomials, variable):
    # Initialize feature values with additional features
    E1, E2, E3, E4, E5, E6 = set(), 0, 0, 0, 0, 0
    E7, E8, E9, E10 = 0, 0, 0, 0
    E11, E12, E13, E14 = 0, 0, 0, 0  # Initializing new features

    total_terms = 0  # Total number of terms across all polynomials

    # Compile regex patterns
    variable_pattern = re.compile(rf"{variable}\^?(\d*)")
    term_pattern = re.compile(rf"(\$\*?\d*)?{variable}\^?(\d*)")
    all_term_pattern = re.compile(r"(\$\*?\d*)?x\d+\^?\d*")  # Pattern to match all terms

    for poly in polynomials:
        if variable in poly:
            E2 += 1  # Increment count of polynomials containing the variable
            
            terms_with_x = term_pattern.findall(poly)  # Find all terms containing the variable
            E10 += len(terms_with_x)  # Sum of number of terms containing x
            
            all_terms = all_term_pattern.findall(poly)  # Find all terms
            total_terms += len(all_terms)  # Update total number of terms
            
            other_variables_in_poly = set(re.findall(r"x\d+", poly)) - {variable}
            E1.update(other_variables_in_poly)  # Update set of unique other variables
            
            for coeff, degree in terms_with_x:
                degree = int(degree) if degree else 1
                E3 = max(E3, degree)
                E4 += degree
                E7 = max(E7, degree)
                E8 += degree
                coeff_degree = len(coeff.replace('$*', ''))
                E9 += coeff_degree

            # For E13 and E14, count other variables in terms with x and in the polynomial
            E13 = max(E13, len(other_variables_in_poly))  # Max other vars in the same term (simplified)
            E14 = max(E14, len(other_variables_in_poly))  # Max other vars in the same poly

    # Finalize calculations for E11 and E12
    E11 = E2 / len(polynomials) if polynomials else 0  # Proportion of polynomials containing x
    E12 = E10 / total_terms if total_terms else 0  # Proportion of terms containing x

    # Convert E1 from a set to the count of unique other variables
    E1 = len(E1)
    return [
         E1, E2, E3, E4,  E5,  E6,
         E7,  E8, E9,  E10,  E11,  E12,
         E13,  E14]
    # Return all calculated feature values

def get_polynomials(file_path):
    polynomials = []
    with open(file_path, 'r') as file:
        lines = file.readlines()
        # Skip the first two lines
        for line in lines[2:]:
            polynomials.append(line.strip())
    return polynomials

def get_features(polynomials):

    return [extract_features(polynomials,"x1"),extract_features(polynomials,"x2"),extract_features(polynomials,"x3")]
    
def get_graph(polynomials):
    G = nx.Graph()

    # Process each polynomial to extract variables and add edges
    for poly in polynomials:
        variables = set(re.findall(r'x\d+', poly))
        for var in variables:
            G.add_node(var)
        for var1 in variables:
            for var2 in variables - {var1}:
                G.add_edge(var1, var2)
    return G

import os

def save_graph(file_name):
    # Your existing logic to get polynomials, features, and graph
    polynomials = get_polynomials(file_name)
    features = get_features(polynomials)
    G = get_graph(polynomials)
    adjacency_matrix = nx.to_numpy_array(G)

    # Extract the base file name without directory paths
    base_file_name = os.path.basename(file_name)
    
    # Use the base file name to construct the path for saving
    combined_file_path = os.path.join('gnn_data', f'{base_file_name}_combined.csv')

    # Ensure the 'gnn_data' directory exists
    os.makedirs(os.path.dirname(combined_file_path), exist_ok=True)

    with open(combined_file_path, 'w') as file:
        file.write('# Features\n')
        np.savetxt(file, features, delimiter=',', fmt='%s')
        file.write('# Adjacency Matrix\n')
        np.savetxt(file, adjacency_matrix, delimiter=',', fmt='%s')


In [18]:


# Assuming 'csv_file_path' is the path to your CSV file listing the polynomial files
csv_file_path = 'balanced_processed.csv'
df = pd.read_csv(csv_file_path)
# Read the CSV file, assuming it's tab-separated
df = pd.read_csv(csv_file_path, delimiter='\t')

# Clean up column names by stripping tab characters and whitespace
df.columns = df.columns.str.strip()

# Optionally, print the cleaned column names to verify
print(df.columns)


# Iterate through each row in the DataFrame
for index, row in df.iterrows():
    #input_file_name = "C:/Users/rohit/OneDrive/Documents/CAD_Project/metitarski_data/polys/"+row['input_file']
    input_file_name = "C:/Users/rohit/OneDrive/Documents/CAD_repo/metitarski_data/polys/"+row['input_file']
    
    save_graph(input_file_name)

Index(['Unnamed: 0', 'file_id', 'input_file', 'label_file', 'nr_polynomials',
       'max_total_degree', 'max_x1', 'max_x2', 'max_x3', 'prop_x1', 'prop_x2',
       'prop_x3', 'prop_mon_x1', 'prop_mon_x2', 'prop_mon_x3', 'label'],
      dtype='object')


KeyboardInterrupt: 

In [15]:
import pandas as pd
import os

# Assuming 'csv_file_path' is the path to your CSV file listing the polynomial files
csv_file_path = 'balanced_processed.csv'
df = pd.read_csv(csv_file_path, delimiter='\t')  # Adjust based on your file's actual delimiter

# Ensure the 'gnn_data' directory exists
gnn_data_dir = 'C:/Users/rohit/OneDrive/Documents/CAD_repo/gnn_data/gnn_labels'
os.makedirs(gnn_data_dir, exist_ok=True)

for index, row in df.iterrows():
    input_file_name = row['input_file']
    label = row['label']

    # Construct a valid file path for saving the label
    base_file_name = os.path.basename(input_file_name)
    label_file_path = os.path.join(gnn_data_dir, f'{base_file_name}_label.txt')
    
    # Save the label to a file
    with open(label_file_path, 'w') as label_file:
        label_file.write(str(label))
