In [1]:
import json
import joblib
import spacy
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load spaCy model
nlp = spacy.load('en_core_web_sm')

# Load the dataset
with open('ipc_sections.json') as f:
    data = json.load(f)

# Prepare data
descriptions = [item['description'] for item in data['sections']]
sections = [item['section'] for item in data['sections']]

# Preprocess text using spaCy
def preprocess(text):
    doc = nlp(text)
    return ' '.join([token.lemma_ for token in doc if not token.is_stop and not token.is_punct])

descriptions = [preprocess(description) for description in descriptions]

# Debugging: Print preprocessed texts and labels
print("Preprocessed Descriptions:", descriptions)
print("Sections:", sections)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(descriptions, sections, test_size=0.2, random_state=42)

# Debugging: Print the size of training and test sets
print("Training set size:", len(X_train))
print("Test set size:", len(X_test))

# Define the model pipeline
model = Pipeline([
    ('vectorizer', CountVectorizer()),
    ('classifier', LinearSVC())
])

# Train the model
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')

# Debugging: Print predictions and true labels
print("Predictions:", y_pred)
print("True Labels:", y_test)

# Save the model
joblib.dump(model, 'ipc_model.pkl')


ModuleNotFoundError: No module named 'spacy'

In [2]:
pip install json joblib spacy scikit-learn
python -m spacy download en_core_web_sm


SyntaxError: invalid syntax (3401680075.py, line 1)

In [3]:
!pip install spacy scikit-learn joblib


Defaulting to user installation because normal site-packages is not writeable
Collecting spacy
  Downloading spacy-3.7.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (27 kB)
Collecting spacy-legacy<3.1.0,>=3.0.11 (from spacy)
  Downloading spacy_legacy-3.0.12-py2.py3-none-any.whl.metadata (2.8 kB)
Collecting spacy-loggers<2.0.0,>=1.0.0 (from spacy)
  Downloading spacy_loggers-1.0.5-py3-none-any.whl.metadata (23 kB)
Collecting murmurhash<1.1.0,>=0.28.0 (from spacy)
  Downloading murmurhash-1.0.10-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.0 kB)
Collecting cymem<2.1.0,>=2.0.2 (from spacy)
  Downloading cymem-2.0.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.4 kB)
Collecting preshed<3.1.0,>=3.0.2 (from spacy)
  Downloading preshed-3.0.9-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.2 kB)
Collecting thinc<8.3.0,

In [5]:
!python -m spacy download en_core_web_sm


/bin/bash: line 1: python: command not found


In [6]:
python3 --version


NameError: name 'python3' is not defined