In [1]:
import pandas as pd
from sklearn import svm
from sklearn.feature_extraction.text import CountVectorizer

In [2]:
# Step 1: Load your data from the Excel file
data = pd.read_excel('training (2) (1).xlsx')

# Remove rows with missing values in the 'input' column
data = data.dropna(subset=['input'])

# Assuming 'input' is your feature column, and 'Classification' is your target variable.
X_train = data['input'].values
y_train = data['Classification'].values

# Handle missing values by replacing them with an empty string
X_train = ["" if pd.isnull(x) else x for x in X_train]

# Step 2: Convert mathematical expressions to numerical features using CountVectorizer
vectorizer = CountVectorizer()
X_train = vectorizer.fit_transform(X_train)

# Step 3: Initialize and train the SVM
clf = svm.SVC()
clf.fit(X_train, y_train)

# Step 4: Get the support vectors
support_vectors = clf.support_vectors_

# Step 5: Study the support vectors
# You can now analyze the support vectors to understand their influence on the decision boundary.

# Optional: Evaluate the SVM on a test set (if available)
# X_test = ...
# y_test = ...
# y_pred = clf.predict(X_test)
# ... (evaluation metrics)

# Optional: Save the trained SVM model for future use
# import joblib
# joblib.dump(clf, 'svm_model.pkl')
# After fitting the SVM model
print("SVM trained successfully")

# After getting the support vectors
print("Support vectors:", support_vectors)


SVM trained successfully
Support vectors:   (0, 265)	1.0
  (0, 476)	1.0
  (0, 822)	1.0
  (1, 146)	1.0
  (1, 290)	1.0
  (1, 291)	1.0
  (1, 788)	1.0
  (1, 938)	1.0
  (1, 940)	2.0
  (1, 995)	1.0
  (1, 1006)	1.0
  (1, 1021)	1.0
  (1, 1117)	2.0
  (1, 1157)	1.0
  (1, 1171)	1.0
  (1, 1203)	1.0
  (1, 1315)	2.0
  (1, 1334)	1.0
  (1, 1369)	1.0
  (1, 1376)	1.0
  (1, 1396)	1.0
  (2, 146)	2.0
  (2, 290)	1.0
  (2, 291)	1.0
  (2, 310)	1.0
  :	:
  (778, 52)	2.0
  (778, 104)	1.0
  (778, 123)	2.0
  (778, 225)	3.0
  (778, 255)	5.0
  (778, 295)	1.0
  (778, 354)	1.0
  (778, 383)	1.0
  (778, 468)	3.0
  (778, 552)	2.0
  (778, 629)	1.0
  (778, 656)	1.0
  (778, 669)	3.0
  (778, 672)	4.0
  (779, 52)	1.0
  (779, 104)	1.0
  (779, 123)	3.0
  (779, 225)	2.0
  (779, 255)	3.0
  (779, 468)	2.0
  (779, 491)	1.0
  (779, 552)	1.0
  (779, 629)	1.0
  (779, 669)	3.0
  (779, 672)	3.0


In [3]:
import pandas as pd

# Load the test data
test_data = pd.read_excel('testing (2) (1).xlsx')

# Assuming 'Equation' is your feature column, 'output' is the output column.
X_test = test_data['Equation'].values
y_test = (test_data['output'] > 3.5).astype(int)  # Convert output to binary classification

# Handle missing values by replacing them with an empty string
X_test = ["" if pd.isnull(x) else x for x in X_test]

# Convert mathematical expressions to numerical features using CountVectorizer
X_test = vectorizer.transform(X_test)

# Evaluate the SVM on the test set
y_pred = clf.predict(X_test)

# Calculate accuracy
accuracy = (y_pred == y_test).mean()
print(f'Accuracy: {accuracy}')

Accuracy: 0.7857142857142857


In [4]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

# Load the dataset from the Excel file
df = pd.read_excel("testing (2) (1).xlsx")

# Inspect unique values in 'Equation' and 'Output' columns
print("Unique Values in 'Equation':", df['Equation'].unique())
print("Unique Values in 'output':", df['output'].unique())

# Label encode non-numeric values in 'Equation' and 'Output'
label_encoder = LabelEncoder()
df['Equation'] = label_encoder.fit_transform(df['Equation'])
df['output'] = label_encoder.fit_transform(df['output'])

# Assume 'Equation' and 'Output' are your features, and 'Classification' is the target variable
X = df[['Equation', 'output']]
y = df['Classification']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the SVM classifier
clf = SVC(kernel='linear')
clf.fit(X_train, y_train)

# Use the predict function to get predicted labels for the test set
y_pred = clf.predict(X_test)

# Create a DataFrame to display the results
results_df = pd.DataFrame({'Equation': df.loc[X_test.index, 'Equation'],
                           'Output': df.loc[X_test.index, 'output'],
                           'Actual Classification': y_test,
                           'Predicted Classification': y_pred})

# Display the DataFrame
print(results_df)

# Calculate and print the accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Unique Values in 'Equation': ['px2+px+k=0 \n7((-5)2-5)+k=0\n 7(25-5)+k=0\n140+k=0\nk=-140'
 '3) 2x2+Px 15 to 2(-5)2x² + Px -15 = 0·\n2 (-5) ² + P(-5) - 15=0\n50-5P=15\n5P=35\nP= 35/5 => P=7\np{x² + x) +k=0\nPx² +Px +k = 0\n7x²+7x²+k=0\nb²-4ac=0\n(7) ²-4(1)(k)=0\n49 = 28k\n49=28k\nk=49/28 => k=7/4 K=1.752+ (P(-5)-15=0 50-5P=15 SP=35 P=3515 P=7 etx2+x) tk= Px2 +Pn+K=0 7x27xtk+0 b2-4ac=0 (7) - 4G7(K) - 0 49=28k. K=49 K=1 K=1.75 28 4'
 '2x2+px-15=0 α=-5 αβ==-15/2 α+β=-b/a β=3/2 -5+3/2=-P/2 \nP=7 \npx2+px+k=0 has equal roots\n 7(x2+x)+k=0\nb2-4ac=0 \n(-7)2-4(-7)K=0 \nk=7/4'
 'Given \n2x2+px-15=0\n-5 root -> x=-5 \n2(-5)2+5p-15=0\n50-5p-15=0 \n35-5p=0 \n5=5P \np=7 \np(x2+x)+k=0 \npx2+px+k=0 \n7x2+7x+k=0 \nEqual roots = b2-4ac=0 \n(-7)2-4(-7)K=0\n49-28k=0\nk=7/4'
 'd=192km t=2hrs u0= x\nut = x+16\ns=d/t= 192/2=96kmph st= 96+16=112kmph'
 'd=192\ns=x+16 p=x\ntime taken by s = y-2 time taken by p = y\nx+16= 192/(y-2)(speed component of speed train) x=192/y (speed component of passenger train)'
 

In [5]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

# Load the training dataset
train_df = pd.read_excel("training (2) (1).xlsx")

# Load the testing dataset
test_df = pd.read_excel("testing (2) (1).xlsx")

# Inspect unique values in 'input' and 'output' columns in the training dataset
print("Unique Values in 'input':", train_df['input'].unique())
print("Unique Values in 'output':", train_df['output'].unique())

# Label encode non-numeric values in 'input' and 'output' columns in the training dataset
label_encoder_train = LabelEncoder()
train_df['input'] = label_encoder_train.fit_transform(train_df['input'])
train_df['output'] = label_encoder_train.fit_transform(train_df['output'])

# Inspect unique values in 'Equation' and 'output' columns in the testing dataset
print("Unique Values in 'Equation':", test_df['Equation'].unique())
print("Unique Values in 'output':", test_df['output'].unique())

# Label encode non-numeric values in 'Equation' and 'output' columns in the testing dataset
label_encoder_test = LabelEncoder()
test_df['Equation'] = label_encoder_test.fit_transform(test_df['Equation'])
test_df['output'] = label_encoder_test.fit_transform(test_df['output'])

# Assume 'input' and 'output' are your features, and 'Classification' is the target variable in the training dataset
X_train = train_df[['input', 'output']]
y_train = train_df['Classification']

# Assume 'Equation' and 'output' are your features in the testing dataset
X_test = test_df[['Equation', 'output']]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# List of kernel functions to experiment with
kernel_functions = ['linear', 'poly', 'rbf', 'sigmoid']

# Iterate over each kernel function and train the SVM classifier
for kernel_function in kernel_functions:
    # Create and train the SVM classifier
    clf = SVC(kernel=kernel_function)
    clf.fit(X_train, y_train)

    # Use the predict function to get predicted labels for the test set
    y_pred = clf.predict(X_test)

    # Calculate and print the accuracy for each kernel function
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy with {kernel_function} kernel: {accuracy}")

Unique Values in 'input': ['px2+px+k=0 \n7((-5)2-5)+k=0\n 7(25-5)+k=0\n140+k=0\nk=-140'
 'X= - b ± √b2 – 4ac/2a\t\nx=  - 3 ± √(3)2 -4(4)5/2(4)\t\n'
 'x2 + kx + 64=0 \nx - 8x +k =0 \nb - 4ac>= 0\n k2- 4(1)(64) >0 \n' ...
 '(7P+2)²-4 (2P+1) (7P-3)\n= 49P²+4+23P - 4 (14P-6P+7P-3)\n= 49P ² + 4 +28p - 56P² - 4P +12=0\n -7P² +24 P+16=0\n 7P+4=0, P-4=0\n7P²-24P-16=0\n7p²-28P+48-16=0\n7P (P-4)+4(P-4)=0\n(7p+4) (P-4)\n7P=-4 P=4\nP = -4/7, P=4'
 '(7p+2)² - 4(2p + 1)(7p - 3) = 0\n49p²+ 4 + 28p - 4[14p² - 6p + 7p - 3] = 0\n49p² + 4 + 28p - (56p² - 24p + 28p - 12) + 0\n- 7p² + 32p - 8 + 0 7p² - 37p + 8 = 0\n49p²+ 4 + 28p - 56p² + 74p - 28p + 12 = 0\n- 7p² + 24p + 16 = 0\n7p² - 24p - 16 = 0                                                      - p =4,4/7'
 '49p²+ 4 + 28p - 4(14p² - 6p + 7p - 3) = 0\n49p² + 4 + 28p - 56p² - 4 + 12 = 0\n- 7p² + 24p + 16 = 0\n7p² - 24p - 16 = 0\n7p² - 28p + 4p - 16 = 0\n7p(p - 4) + u(p - 4) = 0\n(p - 4)(7p + 4) = 0\nP = - 4 / 7 ,P = 4']
Unique Values in 'output': [0.   