In [1]:
import pandas as pd
excel_files = ["batteryManagement.csv", "accelerometer.csv", "gyroscope.csv","multivibrators.csv","networkdevices.csv","motors.csv","microcontroller.csv"]
dfs = []
for file in excel_files:
    df = pd.read_csv(file)
    df['category']=file.split(".")[0]
    dfs.append(df)
merged_df = pd.concat(dfs, ignore_index=True)

merged_df

Unnamed: 0,product_name,manufacturer,cleaned_datasheet_text,category
0,ADBMS6948,Analog Devices,preliminary data measures up to battery cells ...,batteryManagement
1,ADBMS6830,Analog Devices,data multicell battery monitor typical applica...,batteryManagement
2,ADRF8800,Analog Devices,data low power ghz wireless system on chip low...,batteryManagement
3,ADBMS6816,Analog Devices,typical application circuit data multicell bat...,batteryManagement
4,LTC2959,Analog Devices,power battery gas gauge descriptionthe is an p...,batteryManagement
...,...,...,...,...
900,ADUC836,Analog Devices,dual adcs with embedded kb flash mcu featuresh...,microcontroller
901,ADUC814,Analog Devices,small adc with embedded flash analog ksps adc ...,microcontroller
902,ADUC816,Analog Devices,microconverter adcs with embedded flash mcu ad...,microcontroller
903,ADUC824,Analog Devices,microconverter adcs with embedded flash mcu fe...,microcontroller


In [2]:
merged_df.isnull().sum()

product_name              0
manufacturer              0
cleaned_datasheet_text    1
category                  0
dtype: int64

In [2]:
merged_df.dropna(inplace=True)

In [4]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

X = merged_df['cleaned_datasheet_text']
y = merged_df['category']
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded,test_size=0.2,stratify=y, random_state=42)
# Create a CountVectorizer
vectorizer = CountVectorizer(stop_words='english')

# Fit and transform the training data using CountVectorizer
X_train_count = vectorizer.fit_transform(X_train)

# Transform the testing data using the fitted CountVectorizer
X_test_count = vectorizer.transform(X_test)

In [5]:
nb_classifier = MultinomialNB()
nb_classifier.fit(X_train_count, y_train)
y_pred = nb_classifier.predict(X_test_count)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
report = classification_report(y_test, y_pred)
print(report)

Accuracy: 0.9668508287292817
              precision    recall  f1-score   support

           0       0.96      0.96      0.96        45
           1       1.00      0.97      0.98        33
           2       0.88      0.78      0.82         9
           3       0.89      1.00      0.94         8
           4       1.00      0.95      0.97        20
           5       1.00      1.00      1.00         6
           6       0.97      1.00      0.98        60

    accuracy                           0.97       181
   macro avg       0.96      0.95      0.95       181
weighted avg       0.97      0.97      0.97       181



In [6]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train_count, y_train)
y_pred = rf_classifier.predict(X_test_count)
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred))
report = classification_report(y_test, y_pred)
print(report)

Random Forest Accuracy: 0.9447513812154696
              precision    recall  f1-score   support

           0       0.90      1.00      0.95        45
           1       0.97      0.97      0.97        33
           2       1.00      0.78      0.88         9
           3       1.00      1.00      1.00         8
           4       1.00      0.70      0.82        20
           5       1.00      1.00      1.00         6
           6       0.94      0.98      0.96        60

    accuracy                           0.94       181
   macro avg       0.97      0.92      0.94       181
weighted avg       0.95      0.94      0.94       181



In [7]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report
svm_classifier = SVC(kernel='linear')
svm_classifier.fit(X_train_count, y_train)
y_pred = svm_classifier.predict(X_test_count)
print("SVM Accuracy:", accuracy_score(y_test, y_pred))
report = classification_report(y_test, y_pred)
print(report)

SVM Accuracy: 0.9613259668508287
              precision    recall  f1-score   support

           0       0.98      0.96      0.97        45
           1       1.00      1.00      1.00        33
           2       0.90      1.00      0.95         9
           3       1.00      1.00      1.00         8
           4       1.00      0.80      0.89        20
           5       0.86      1.00      0.92         6
           6       0.94      0.98      0.96        60

    accuracy                           0.96       181
   macro avg       0.95      0.96      0.95       181
weighted avg       0.96      0.96      0.96       181

