# Medidas de Posição e Dispersão

In [1]:
import pandas as pd
import numpy as np
import math
import statistics
from scipy import stats

In [2]:
dataset = np.array([160, 165, 167, 164, 160, 166, 160, 161, 150, 152, 173, 160, 155,
                    164, 168, 162, 161, 168, 163, 156, 155, 169, 151, 170, 164,
                    155, 152, 163, 160, 155, 157, 156, 158, 158, 161, 154, 161, 156, 172, 153])
len(dataset)

40

## Média aritmética simples

In [3]:
dataset.sum()/len(dataset)

160.375

In [4]:
dataset.mean()

160.375

In [5]:
statistics.mean(dataset)

160

## Moda

In [6]:
statistics.mode(dataset)

160

In [7]:
stats.mode(dataset)

ModeResult(mode=array([160]), count=array([5]))

## Mediana

In [8]:
dados_impar = [150, 151, 152, 152, 153, 154, 155, 155, 155]
len(dados_impar)

9

### Impar

In [9]:
posicao = math.ceil(len(dados_impar)/2)
dados_impar[5 - 1]

153

In [10]:
np.median(dados_impar)

153.0

In [11]:
statistics.median(dados_impar)

153

### Par

In [12]:
dataset = sorted(dataset)
posicao = len(dataset)//2
(dataset[posicao-1] + dataset[posicao])/2

160.0

In [13]:
np.median(dataset)

160.0

In [14]:
statistics.median(dataset)

160.0

## Média ponderada

In [15]:
notas = np.array([9, 8, 7, 3])
pesos = np.array([1, 2, 3, 4])

In [16]:
sum(notas * pesos)/pesos.sum()

5.8

In [17]:
np.average(notas, weights=pesos)

5.8

## Dados agrupados

In [18]:
dados = {'inferior': [150, 154, 158, 162, 166, 170], 
         'superior': [154, 158, 162, 166, 170, 174],
         'fi': [5, 9, 11, 7, 5, 3]}
dados = pd.DataFrame(dados)
print(dados.shape)
dados

(6, 3)


Unnamed: 0,inferior,superior,fi
0,150,154,5
1,154,158,9
2,158,162,11
3,162,166,7
4,166,170,5
5,170,174,3


In [19]:
# ponto médio
dados['xi'] = (dados['superior'] + dados['inferior'])/2
dados

Unnamed: 0,inferior,superior,fi,xi
0,150,154,5,152.0
1,154,158,9,156.0
2,158,162,11,160.0
3,162,166,7,164.0
4,166,170,5,168.0
5,170,174,3,172.0


In [20]:
dados['fi.xi'] = dados['fi']*dados['xi']
dados

Unnamed: 0,inferior,superior,fi,xi,fi.xi
0,150,154,5,152.0,760.0
1,154,158,9,156.0,1404.0
2,158,162,11,160.0,1760.0
3,162,166,7,164.0,1148.0
4,166,170,5,168.0,840.0
5,170,174,3,172.0,516.0


In [21]:
# frequencia acumulada
dados['Fi'] = [dados.iloc[:(i+1),2:3].sum()['fi'] for i in range(dados.shape[0])]
dados

Unnamed: 0,inferior,superior,fi,xi,fi.xi,Fi
0,150,154,5,152.0,760.0,5
1,154,158,9,156.0,1404.0,14
2,158,162,11,160.0,1760.0,25
3,162,166,7,164.0,1148.0,32
4,166,170,5,168.0,840.0,37
5,170,174,3,172.0,516.0,40


In [22]:
# média
dados['fi.xi'].sum()/dados['fi'].sum()

160.7

In [23]:
# moda
dados[dados['fi'] == dados['fi'].max()]['xi'].values[0]

160.0

In [24]:
# mediana
fi_2 = (dados['fi'].sum()/2) # = 20 (está na classe 2)
classe = dados[dados['Fi']<fi_2].index[-1]+1 # encontrando a classe ao qual pertence
limite_inf, Fi_classe = dados.iloc[[classe]][['inferior','fi']].values[0]
Fi_anterior = dados.iloc[[classe-1]]['Fi'].values[0]

mediana = limite_inf + ((fi_2 - Fi_anterior) * 4)/Fi_classe
mediana

160.1818181818182

## Média geométrica

In [25]:
from scipy.stats.mstats import gmean

In [27]:
gmean(dataset)

160.26958390038902

## Média harmônica

In [28]:
def quadratic_mean(dados):
    return math.sqrt(sum(n * n for n in dados) / len(dados))

In [29]:
quadratic_mean(dataset)

160.48091786876097