In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model

# Load the dataset
url = "https://drive.google.com/uc?id=1mNnY9TxogefNRBmQQr8hl4sVhxaqEuvK"
data = pd.read_csv(url)

# Split the dataset
train_data, test_data = train_test_split(data, test_size=0.3, random_state=42)

# Reduce dataset size for quick testing
train_data_small = train_data.sample(frac=0.1, random_state=42)
test_data_small = test_data.sample(frac=0.1, random_state=42)
X_train_small = train_data_small.drop(columns=['dastgah'])
y_train_small = train_data_small['dastgah']

# Convert non-numeric values to numeric where applicable
non_numeric_columns = X_train_small.select_dtypes(include=['object']).columns
for column in non_numeric_columns:
    X_train_small[column] = X_train_small[column].astype('category').cat.codes
    test_data_small[column] = test_data_small[column].astype('category').cat.codes

# Part ب: SVM Classification and Cross-Validation
svm = SVC(kernel='rbf')
scores = cross_val_score(svm, X_train_small, y_train_small, cv=3)
svm.fit(X_train_small, y_train_small)
y_pred = svm.predict(test_data_small.drop(columns=['dastgah']))
conf_matrix = confusion_matrix(test_data_small['dastgah'], y_pred)
class_report = classification_report(test_data_small['dastgah'], y_pred)

# Part ج: GridSearch for Hyperparameter Tuning
param_grid = {'C': [1, 10], 'gamma': [0.1, 0.01], 'kernel': ['linear']}
grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=1, cv=3)
grid.fit(X_train_small, y_train_small)
best_params = grid.best_params_

# Part د: LDA and PCA for Dimensionality Reduction
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_train_small)
lda = LDA(n_components=2)
X_lda = lda.fit_transform(X_train_small, y_train_small)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
for class_value in set(y_train_small):
    indices = y_train_small == class_value
    ax1.scatter(X_pca[indices, 0], X_pca[indices, 1], label=class_value)
ax1.set_title('PCA Result')
ax1.legend()
for class_value in set(y_train_small):
    indices = y_train_small == class_value
    ax2.scatter(X_lda[indices, 0], X_lda[indices, 1], label=class_value)
ax2.set_title('LDA Result')
ax2.legend()
plt.show()

# Part ه: Feature Extraction Based on Audio Nature
new_feature_df = X_train_small.copy()  # Replace with actual feature extraction process
input_dim = new_feature_df.shape[1]
input_layer = Input(shape=(input_dim, ))
encoded = Dense(64, activation='relu')(input_layer)
encoded = Dense(32, activation='relu')(encoded)
decoded = Dense(64, activation='relu')(encoded)
decoded = Dense(input_dim, activation='sigmoid')(decoded)
autoencoder = Model(input_layer, decoded)
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.fit(new_feature_df, new_feature_df, epochs=10, batch_size=128, shuffle=True, validation_split=0.2)

# Part و: Application of New Features for Classification
encoder = Model(inputs=autoencoder.input, outputs=autoencoder.layers[1].output)
encoded_features = encoder.predict(new_feature_df)
rf_new = RandomForestClassifier(n_estimators=100, random_state=42)
rf_new.fit(encoded_features, y_train_small)
new_features_test = encoder.predict(test_data_small.drop(columns=['dastgah']))
y_pred_new = rf_new.predict(new_features_test)
conf_matrix_new = confusion_matrix(test_data_small['dastgah'], y_pred_new)
class_report_new = classification_report(test_data_small['dastgah'], y_pred_new)

# Part ز: Model Comparison and Performance Metrics
models = ['Original Features', 'New Features']
accuracy_scores = [accuracy_score(test_data_small['dastgah'], y_pred), accuracy_score(test_data_small['dastgah'], y_pred_new)]
sns.barplot(x=models, y=accuracy_scores)
plt.title('Model Comparison')
plt.ylabel('Accuracy')
plt.show()
print("Original Features Classification Report:\n", class_report)
print("New Features Classification Report:\n", class_report_new)


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 3 folds for each of 4 candidates, totalling 12 fits
