## Descriptive Statistics in Python

In [51]:
# Pairwise Pearson correlations
import pandas
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']

In [52]:
data = pandas.read_csv(url, names=names)

### 1. Peek at Your Data

In [53]:
peek = data.head(20)
print(peek)

    preg  plas  pres  skin  test  mass   pedi  age  class
0      6   148    72    35     0  33.6  0.627   50      1
1      1    85    66    29     0  26.6  0.351   31      0
2      8   183    64     0     0  23.3  0.672   32      1
3      1    89    66    23    94  28.1  0.167   21      0
4      0   137    40    35   168  43.1  2.288   33      1
5      5   116    74     0     0  25.6  0.201   30      0
6      3    78    50    32    88  31.0  0.248   26      1
7     10   115     0     0     0  35.3  0.134   29      0
8      2   197    70    45   543  30.5  0.158   53      1
9      8   125    96     0     0   0.0  0.232   54      1
10     4   110    92     0     0  37.6  0.191   30      0
11    10   168    74     0     0  38.0  0.537   34      1
12    10   139    80     0     0  27.1  1.441   57      0
13     1   189    60    23   846  30.1  0.398   59      1
14     5   166    72    19   175  25.8  0.587   51      1
15     7   100     0     0     0  30.0  0.484   32      1
16     0   118

### 2. Data Type For Each Attribute

In [54]:
types = data.dtypes
print(types)

preg       int64
plas       int64
pres       int64
skin       int64
test       int64
mass     float64
pedi     float64
age        int64
class      int64
dtype: object


### 3. Dimensions of Your Data

In [55]:
shape = data.shape
print(shape)

(768, 9)


### 4. Descriptive Statistics

In [60]:
data.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
preg,768.0,3.845,3.37,0.0,1.0,3.0,6.0,17.0
plas,768.0,120.895,31.973,0.0,99.0,117.0,140.25,199.0
pres,768.0,69.105,19.356,0.0,62.0,72.0,80.0,122.0
skin,768.0,20.536,15.952,0.0,0.0,23.0,32.0,99.0
test,768.0,79.799,115.244,0.0,0.0,30.5,127.25,846.0
mass,768.0,31.993,7.884,0.0,27.3,32.0,36.6,67.1
pedi,768.0,0.472,0.331,0.078,0.244,0.372,0.626,2.42
age,768.0,33.241,11.76,21.0,24.0,29.0,41.0,81.0
class,768.0,0.349,0.477,0.0,0.0,0.0,1.0,1.0


### 5. Class Distribution (Classification Only)

In [61]:
class_counts = data.groupby('class').size()
class_counts

class
0    500
1    268
dtype: int64

### 6. Correlation Between Attributes

In [58]:
pandas.set_option('display.width', 100)
pandas.set_option('precision', 3)

In [62]:
correlations = data.corr(method='pearson')
correlations

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age,class
preg,1.0,0.129,0.141,-0.082,-0.074,0.018,-0.034,0.544,0.222
plas,0.129,1.0,0.153,0.057,0.331,0.221,0.137,0.264,0.467
pres,0.141,0.153,1.0,0.207,0.089,0.282,0.041,0.24,0.065
skin,-0.082,0.057,0.207,1.0,0.437,0.393,0.184,-0.114,0.075
test,-0.074,0.331,0.089,0.437,1.0,0.198,0.185,-0.042,0.131
mass,0.018,0.221,0.282,0.393,0.198,1.0,0.141,0.036,0.293
pedi,-0.034,0.137,0.041,0.184,0.185,0.141,1.0,0.034,0.174
age,0.544,0.264,0.24,-0.114,-0.042,0.036,0.034,1.0,0.238
class,0.222,0.467,0.065,0.075,0.131,0.293,0.174,0.238,1.0
