In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

## Biomechanical features of orthopedic patients

<a href="https://ibb.co/1ZQ9Y30"><img src="https://i.ibb.co/K6h9cY5/images.jpg" alt="images" border="0"></a>

**Content:**
1. Introduction to Python:
 * [SUPERVISED LEARNING](#1)
   * [Machine Learning]
   * [EDA(Exploratory Data Analysis)](#1)
   * [K-Nearest Neighbors (KNN)](#2)
  * [UNSUPERVISED LEARNING](#3)
   * [Kmeans Clustering](#4)
   * [Hierachy](#5)
* Machine Learning Logistic Regression (Breast Cancer)
  *   https://www.kaggle.com/yaseraktas/machine-learning-with-breast-cancer
* [Conclusion](#6)

<a id="1"></a> <br>
## SUPERVISED LEARNING

### EXPLORATORY DATA ANALYSIS (EDA)

In [None]:
#import dataset

data=pd.read_csv('../input/biomechanical-features-of-orthopedic-patients/column_2C_weka.csv')

In [None]:
data.info()

In [None]:
data.head()

In [None]:
data['class'].unique()

In [None]:
data.describe()

In [None]:
color_list = ['cyan' if i=='Abnormal' else 'orange' for i in data.loc[:,'class']]
pd.plotting.scatter_matrix(data.loc[:, data.columns != 'class'],
                           c=color_list,
                           figsize= [17,17],
                           diagonal='hist',
                           alpha=0.5,
                           s = 200,
                           marker = '*',
                           edgecolor= "black")
                                        
plt.show()

In [None]:
plt.figure(figsize=(15,15))
sns.heatmap(data.corr(),annot=True,cmap='RdBu_r')

In [None]:
sns.countplot(x="class", data=data)
plt.show()

In [None]:
# create data1 and data2 that includes pelvic_incidence that is feature and sacral_slope that is target variable
data1 = data[data['class'] =='Abnormal']
data2 = data[data['class'] =='Normal']
x = np.array(data1.loc[:,'pelvic_incidence']).reshape(-1,1)
y = np.array(data1.loc[:,'sacral_slope']).reshape(-1,1)
x2=np.array(data2.loc[:,'pelvic_incidence']).reshape(-1,1)
y2=np.array(data2.loc[:,'sacral_slope']).reshape(-1,1)
# Scatter
plt.figure(figsize=[5,5])
plt.scatter(x=x,y=y,color='cyan',marker="*",label='Abnormal')
plt.scatter(x=x2,y=y2,color='orange',marker="*",label="Normal")
plt.xlabel('pelvic_incidence')
plt.ylabel('sacral_slope')
plt.legend()
plt.show()


In [None]:
data.loc[:,'class'].value_counts()

In [None]:
data.head()

<a id="2"></a> <br>
###  K-NEAREST NEIGHBORS (KNN)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 3)
x,y = data.loc[:,data.columns != 'class'], data.loc[:,'class']
knn.fit(x,y)
prediction = knn.predict(x)
print('Prediction: {}'.format(prediction))

In [None]:
# train test split 
#train %70  and test %30 
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.3,random_state = 1)

knn = KNeighborsClassifier(n_neighbors = 5)
x,y = data.loc[:,data.columns != 'class'], data.loc[:,'class']

knn.fit(x_train,y_train)
prediction = knn.predict(x_test)

#print('Prediction: {}'.format(prediction))
print('With KNN (K=5) accuracy is: ',knn.score(x_test,y_test)) # accuracy

In [None]:
prediction

In [None]:
#find k value
score_list=[]
train_accuracy = []
for each in range (1,25):
    knn=KNeighborsClassifier(n_neighbors=each)
    knn.fit(x_train,y_train)
    score_list.append(knn.score(x_test,y_test))
    train_accuracy.append(knn.score(x_train, y_train))
plt.plot(range(1,25),score_list,label="Testing Accuracy",color="red")
plt.plot(range(1,25), train_accuracy, label = 'Training Accuracy',color="green")
plt.xlabel("Number of Neighbors")
plt.ylabel("accuracy")
plt.title('Value VS Accuracy')
plt.savefig('graph.png')
plt.legend()
plt.show()

In [None]:
print("Best accuracy is {} with K = {}".format(np.max(score_list),1+score_list.index(np.max(score_list))))

<a id="3"></a> <br>
## UNSUPERVISED LEARNING

<a id="4"></a> <br>
## KMEANS

In [None]:
data.head()

In [None]:
plt.scatter(data['pelvic_radius'],data['degree_spondylolisthesis'])
plt.xlabel('pelvic_radius')
plt.ylabel('degree_spondylolisthesis')
plt.show()

In [None]:
data2=data.loc[:,['degree_spondylolisthesis','pelvic_radius']]
from sklearn.cluster import KMeans
kmeans=KMeans(n_clusters=2)
kmeans.fit(data2)
labels=kmeans.predict(data2)
plt.scatter(data['pelvic_radius'],data['degree_spondylolisthesis'],c=labels)
plt.xlabel('pelvic_radius')
plt.ylabel('degree_spondylolisthesis')
plt.show()

In [None]:
wcss = []
for i in range(1,15):
    kmeans = KMeans(n_clusters=i)
    kmeans.fit(data2)
    wcss.append(kmeans.inertia_)
plt.plot(range(1,15),wcss,'-*')
plt.xlabel('Number of k (cluster value)')
plt.ylabel('Wcss')
plt.show()

<a id="5"></a> <br>
## HIERARCHY

In [None]:
data3 = data.drop('class',axis = 1)

In [None]:
data3.head()

In [None]:
from scipy.cluster.hierarchy import linkage, dendrogram 

merg=linkage(data2,method="ward")
dendrogram(merg) #hiartical clustring algoritmasını kullanıyorum.
dendrogram(merg,leaf_rotation=90)
plt.xlabel("data paoints")
plt.ylabel("euclidean distance")
plt.show()

In [None]:
from scipy.cluster.hierarchy import linkage,dendrogram

merg = linkage(data3.iloc[200:220,:],method = 'single')
dendrogram(merg, leaf_rotation = 90, leaf_font_size = 6)
plt.show()

<a id="6"></a> <br>
## CONCLUSION

Thank you for your votes and comments

If you have any suggest, May you write for me, I will be happy to hear it.