In [6]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load dataset
diabetes_df = pd.read_csv('diabetes.csv')

# Display basic information about the dataset
print(diabetes_df.head(5))
print(diabetes_df.shape)
print(diabetes_df['Outcome'].value_counts())
print(diabetes_df.info())
print(diabetes_df.describe())
print(diabetes_df.groupby('Outcome').mean())

# Separate features and target variable
X = diabetes_df.drop('Outcome', axis=1)
y = diabetes_df['Outcome']

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Initialize SVM classifier
model = SVC(kernel='linear')

# Train the model
model.fit(X_train, y_train)

# Predict on test data
test_y_pred = model.predict(X_test)

# Evaluate accuracy
print('Test set Accuracy:', accuracy_score(test_y_pred, y_test))

# Example input data for prediction
input_data = [[1, 85, 66, 29, 0, 26.6, 0.351, 31]]
input_data_nparray = np.asarray(input_data)
reshaped_input_data = input_data_nparray.reshape(1, -1)

# Make prediction using the trained model
prediction = model.predict(reshaped_input_data)

# Interpret the prediction
if prediction == 1:
    print('This person has diabetes')
else:
    print('This person does not have diabetes')

# Additional output for checking the first 100 rows of the dataset
print(diabetes_df.head(100))


ModuleNotFoundError: No module named 'numpy'