In [1]:
import pandas as pd

# Step 1: Load the CSV file
input_file = 'primes_py/18001 to 20000 primes_multiplier_each_number.csv'  # Replace with your file name
output_file = 'sorted_columns.csv'  # Desired output file name
data = pd.read_csv(input_file)

# Step 2: Extract and sort column headers
# Assuming headers are in the format 'number_plus/minus_index'
def sort_key(col):
    # Split by underscore to extract the parts
    parts = col.split('_')
    if len(parts) == 3:
        number, sign, index = parts
        number = int(number)  # Convert to integer for sorting
        index = int(index)  # Convert to integer for sorting
        return (number, sign, index)  # Sorting key
    return (float('inf'), col, float('inf'))  # Default for unexpected columns

sorted_columns = sorted(data.columns, key=sort_key)

# Step 3: Rearrange the columns in the DataFrame
sorted_data = data[sorted_columns]

# Step 4: Save to a new CSV file
sorted_data.to_csv(output_file, index=False)
print(f"Columns sorted and saved to {output_file}")


Columns sorted and saved to sorted_columns.csv


In [None]:

import pandas as pd
import re

# Step 1: Read the CSV file
input_file = '15001 to 18000 primes_multiplier_each_number.csv'  # Replace with your input file path

df = pd.read_csv(input_file)

# Step 2: Identify Column Pairs
column_pairs = {}
for column in df.columns:
    match = re.match(r'(\d+)_(plus|minus)_1', column)
    if match:
        base_number, suffix = match.groups()
        if base_number not in column_pairs:
            column_pairs[base_number] = {'plus': None, 'minus': None}
        column_pairs[base_number][suffix] = column

# Step 3: Filter Rows
filtered_data = []
for base_number, columns in column_pairs.items():
    plus_column = columns['plus']
    minus_column = columns['minus']

    # Filter rows where values are identical and not zero
    filtered_df = df[(df[plus_column] == df[minus_column]) & (df[plus_column] != 0)][[plus_column, minus_column]]
    filtered_data.append(filtered_df)

# Step 4: Combine Filtered Data
combined_df = pd.concat(filtered_data, axis=1)

# Step 5: Sort and Spread Values
# Sort the DataFrame to put non-NaN values at the top
sorted_df = combined_df.apply(lambda x: pd.Series(x.dropna().values))

# Step 6: Export to CSV
output_file = 'organized_output15001.csv'  # Replace with your desired output file path
sorted_df.to_csv(output_file, index=False)

print(f"Organized data has been exported to {output_file}")


Organized data has been exported to organized_output15001.csv


In [19]:
import pandas as pd

# Step 1: Read the Output CSV File
output_file = 'organized_output30001.csv'  # Replace with your output file path
df = pd.read_csv(output_file)

# Step 2: Calculate the Number of Columns for the Top 90%
total_columns = df.shape[1]
top_90_percent_columns = int(total_columns * 0.9)

# Step 3: Filter the Top 90% Columns
# Calculate the number of non-NaN values in each column
non_nan_counts = df.count()

# Select the top 90% of columns with the most non-NaN values
top_90_percent_columns_list = non_nan_counts.nlargest(top_90_percent_columns).index

# Filter the DataFrame to include only these columns
top_90_percent_df = df[top_90_percent_columns_list]

# Step 4: Export to CSV
top_90_percent_output_file = '30001top_90_percent_columns_output.csv'  # Replace with your desired output file path
top_90_percent_df.to_csv(top_90_percent_output_file, index=False)

print(f"Top 90% columns data has been exported to {top_90_percent_output_file}")


Top 90% columns data has been exported to 30001top_90_percent_columns_output.csv


In [3]:
import pandas as pd
import re

# Step 1: Read the CSV file
input_file = '30001 to 32000 primes_multiplier_each_number.csv'  # Replace with your input file path

df = pd.read_csv(input_file)

# Step 2: Identify Column Pairs
column_pairs = {}
for column in df.columns:
    match = re.match(r'(\d+)_(plus|minus)_1', column)
    if match:
        base_number, suffix = match.groups()
        if base_number not in column_pairs:
            column_pairs[base_number] = {'plus': None, 'minus': None}
        column_pairs[base_number][suffix] = column

# Step 3: Convert Columns to Sets and Calculate Intersections
intersections = {}
for base_number, columns in column_pairs.items():
    plus_column = columns['plus']
    minus_column = columns['minus']

    # Convert columns to sets, excluding zeros
    plus_set = set(df[plus_column][df[plus_column] != 0])
    minus_set = set(df[minus_column][df[minus_column] != 0])

    # Calculate the intersection of the sets
    intersection = plus_set.intersection(minus_set)

    # Store the intersection in a dictionary
    intersections[base_number] = intersection

# Step 4: Create a DataFrame with Base Numbers as Headers
# Find the maximum length of intersections to create a DataFrame with the correct shape
max_length = max(len(intersection) for intersection in intersections.values())

# Create a DataFrame with base numbers as headers and fill with NaN
intersection_df = pd.DataFrame(index=range(max_length), columns=intersections.keys())

# Fill the DataFrame with intersection values
for base_number, intersection in intersections.items():
    intersection_df[base_number] = pd.Series(list(intersection) + [None] * (max_length - len(intersection)))

# Step 5: Sort Values within Each Column
for column in intersection_df.columns:
    intersection_df[column] = pd.Series(intersection_df[column].dropna().sort_values().values)

# Step 6: Export to CSV
intersection_output_file = 'intersections_output2.csv'  # Replace with your desired output file path
intersection_df.to_csv(intersection_output_file, index=False)

print(f"Intersections data has been exported to {intersection_output_file}")



Intersections data has been exported to intersections_output2.csv


In [4]:
import pandas as pd
import re

# Step 1: Read the CSV file
input_file = '30001 to 32000 primes_multiplier_each_number.csv'  # Replace with your input file path

df = pd.read_csv(input_file)

# Step 2: Identify Column Pairs
column_pairs = {}
for column in df.columns:
    match = re.match(r'(\d+)_(plus|minus)_1', column)
    if match:
        base_number, suffix = match.groups()
        if base_number not in column_pairs:
            column_pairs[base_number] = {'plus': None, 'minus': None}
        column_pairs[base_number][suffix] = column

# Step 3: Convert Columns to Sets and Calculate Intersections
intersections = {}
for base_number, columns in column_pairs.items():
    plus_column = columns['plus']
    minus_column = columns['minus']

    # Convert columns to sets, excluding zeros
    plus_set = set(df[plus_column][df[plus_column] != 0])
    minus_set = set(df[minus_column][df[minus_column] != 0])

    # Calculate the intersection of the sets
    intersection = plus_set.intersection(minus_set)

    # Store the intersection in a dictionary
    intersections[base_number] = intersection

# Step 4: Create a DataFrame with Base Numbers as Headers
# Find the maximum length of intersections to create a DataFrame with the correct shape
max_length = max(len(intersection) for intersection in intersections.values())

# Create a DataFrame with base numbers as headers and fill with NaN
intersection_df = pd.DataFrame(index=range(max_length), columns=intersections.keys())

# Fill the DataFrame with intersection values
for base_number, intersection in intersections.items():
    intersection_df[base_number] = pd.Series(list(intersection) + [None] * (max_length - len(intersection)))

# Step 5: Sort Values within Each Column
for column in intersection_df.columns:
    intersection_df[column] = pd.Series(intersection_df[column].dropna().sort_values().values)

# Step 6: Sort the Headers
intersection_df = intersection_df.reindex(sorted(intersection_df.columns), axis=1)

# Step 7: Export to CSV
intersection_output_file = 'intersections_output_sorted.csv'  # Replace with your desired output file path
intersection_df.to_csv(intersection_output_file, index=False)

print(f"Intersections data has been exported to {intersection_output_file}")


Intersections data has been exported to intersections_output_sorted.csv


In [5]:
import pandas as pd

# Step 1: Read the intersections_output.csv file
intersections_output_file = 'intersections_output_sorted.csv'  # Replace with your input file path
df = pd.read_csv(intersections_output_file)

# Step 2: Count the Number of Values for Each Header
value_counts = df.count()

# Step 3: Create a DataFrame with the Counts
counts_df = value_counts.reset_index()
counts_df.columns = ['base_number', 'value_count']

# Step 4: Export to CSV
counts_output_file = 'value_counts_output.csv'  # Replace with your desired output file path
counts_df.to_csv(counts_output_file, index=False)

print(f"Value counts data has been exported to {counts_output_file}")


Value counts data has been exported to value_counts_output.csv
