# Intro to Python
## Módulos

No python, os códigos externos são chamadas de **módulos**, para utilizá-los em seu código, utilize o comando **import**



In [81]:
import math

A = 16
B = math.sqrt(A)

print(B)

4.0


## Criando DataSets

Com o módulo **pandas** podemos criar estruturas de dados (similares à tabelas) chamadas *dataframes*.

In [82]:
import pandas as pd

meu_dataset = {
    'carros': ["BMW", "Ford", "Volvo"],
    'passageiros': [5,7,4]
}

df = pd.DataFrame(meu_dataset)
df


Unnamed: 0,carros,passageiros
0,BMW,5
1,Ford,7
2,Volvo,4


In [83]:
# Series no pandas são como colunas de tabelas (um array de uma dimensão)

a = [1,7,2]
print(a)

# Podemos ajustar os lables, que por padrão são inteiros e começam em zero.

a = pd.Series(a, index = ["x", "y", "z"])

print(a["y"])

# Podemos usar um objeto key/value, como um dicionário, quando criamos a Serie.

calorias = {"dia 1": 10, "dia 2": 20, "dia 3": 22}

# Para selecionar items específicos, use o argumento index

var = pd.Series(calorias, index = ["dia 1", "dia 3"])
print(var.mean())

[1, 7, 2]
7
16.0


## Importar Datasets

Para importar datasets utilizamos módulo **pandas**. Dataframes são estruturas de duas dimensões, como uma tabela com linhas e colunas.

In [84]:
import pandas as pd
import numpy as np

data = pd.read_csv('titanic3.csv')
data

# Usamos "loc" para localizar linhas específicas

data.loc[[5]]

# print(pd.options.display.max_rows) 


Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
5,1,1,"Anderson, Mr. Harry",male,48.0,0,0,19952,26.55,E12,S,3,,"New York, NY"


## Analisando Dataframes

In [85]:
data.head(5)

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
0,1,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S,2.0,,"St Louis, MO"
1,1,1,"Allison, Master. Hudson Trevor",male,0.92,1,2,113781,151.55,C22 C26,S,11.0,,"Montreal, PQ / Chesterville, ON"
2,1,0,"Allison, Miss. Helen Loraine",female,2.0,1,2,113781,151.55,C22 C26,S,,,"Montreal, PQ / Chesterville, ON"
3,1,0,"Allison, Mr. Hudson Joshua Creighton",male,30.0,1,2,113781,151.55,C22 C26,S,,135.0,"Montreal, PQ / Chesterville, ON"
4,1,0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0,1,2,113781,151.55,C22 C26,S,,,"Montreal, PQ / Chesterville, ON"


In [86]:
data.tail()

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
1304,3,0,"Zabour, Miss. Hileni",female,14.5,1,0,2665,14.4542,,C,,328.0,
1305,3,0,"Zabour, Miss. Thamine",female,,1,0,2665,14.4542,,C,,,
1306,3,0,"Zakarian, Mr. Mapriededer",male,26.5,0,0,2656,7.225,,C,,304.0,
1307,3,0,"Zakarian, Mr. Ortin",male,27.0,0,0,2670,7.225,,C,,,
1308,3,0,"Zimmerman, Mr. Leo",male,29.0,0,0,315082,7.875,,S,,,


In [87]:
data.info()

# Temos 1309 linhas, mas a coluna "fare", possui 1308 valores, sendo 1 nulo.

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1309 entries, 0 to 1308
Data columns (total 14 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   pclass     1309 non-null   int64  
 1   survived   1309 non-null   int64  
 2   name       1309 non-null   object 
 3   sex        1309 non-null   object 
 4   age        1046 non-null   float64
 5   sibsp      1309 non-null   int64  
 6   parch      1309 non-null   int64  
 7   ticket     1309 non-null   object 
 8   fare       1308 non-null   float64
 9   cabin      295 non-null    object 
 10  embarked   1307 non-null   object 
 11  boat       486 non-null    object 
 12  body       121 non-null    float64
 13  home.dest  745 non-null    object 
dtypes: float64(3), int64(4), object(7)
memory usage: 143.3+ KB


## Limpando Dados

Removendo linhas com valores em branco



In [88]:
#Selecionando apenas as 11 primeiras colunas 
df = data.iloc[:,:11]
df_novo = df.dropna()
print(df_novo.info())
df_novo

#Por padrão ele retorna um novo dataframe, se quiser alterar o atual use "inplace  = True"

# Para remover brancos de uma colunas use:df.dropna(subset=['Coluna'], inplace = True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 270 entries, 0 to 1231
Data columns (total 11 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   pclass    270 non-null    int64  
 1   survived  270 non-null    int64  
 2   name      270 non-null    object 
 3   sex       270 non-null    object 
 4   age       270 non-null    float64
 5   sibsp     270 non-null    int64  
 6   parch     270 non-null    int64  
 7   ticket    270 non-null    object 
 8   fare      270 non-null    float64
 9   cabin     270 non-null    object 
 10  embarked  270 non-null    object 
dtypes: float64(2), int64(4), object(5)
memory usage: 25.3+ KB
None


Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked
0,1,1,"Allen, Miss. Elisabeth Walton",female,29.00,0,0,24160,211.3375,B5,S
1,1,1,"Allison, Master. Hudson Trevor",male,0.92,1,2,113781,151.5500,C22 C26,S
2,1,0,"Allison, Miss. Helen Loraine",female,2.00,1,2,113781,151.5500,C22 C26,S
3,1,0,"Allison, Mr. Hudson Joshua Creighton",male,30.00,1,2,113781,151.5500,C22 C26,S
4,1,0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.00,1,2,113781,151.5500,C22 C26,S
...,...,...,...,...,...,...,...,...,...,...,...
1188,3,1,"Sandstrom, Mrs. Hjalmar (Agnes Charlotta Bengt...",female,24.00,0,2,PP 9549,16.7000,G6,S
1189,3,1,"Sandstrom, Miss. Marguerite Rut",female,4.00,1,1,PP 9549,16.7000,G6,S
1217,3,0,"Soholt, Mr. Peter Andreas Lauritz Andersen",male,19.00,0,0,348124,7.6500,F G73,S
1230,3,0,"Strom, Miss. Telma Matilda",female,2.00,0,1,347054,10.4625,G6,S


## Substituindo o valores em branco

In [95]:
# Substituindo os valores em branco da coluna "fare" por com a média

media = data["fare"].mean()
print(media)
data["fare"].fillna(media, inplace = True)
data["fare"].info()


33.295479281345564
<class 'pandas.core.series.Series'>
RangeIndex: 1309 entries, 0 to 1308
Series name: fare
Non-Null Count  Dtype  
--------------  -----  
1309 non-null   float64
dtypes: float64(1)
memory usage: 10.4 KB


## Corrigindo Formato


In [102]:
data["ticket"] = pd.to_numeric(data["ticket"], "coerce")
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1309 entries, 0 to 1308
Data columns (total 14 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   pclass     1309 non-null   int64  
 1   survived   1309 non-null   int64  
 2   name       1309 non-null   object 
 3   sex        1309 non-null   object 
 4   age        1046 non-null   float64
 5   sibsp      1309 non-null   int64  
 6   parch      1309 non-null   int64  
 7   ticket     957 non-null    float64
 8   fare       1309 non-null   float64
 9   cabin      295 non-null    object 
 10  embarked   1307 non-null   object 
 11  boat       486 non-null    object 
 12  body       121 non-null    float64
 13  home.dest  745 non-null    object 
dtypes: float64(4), int64(4), object(6)
memory usage: 143.3+ KB


## Sobrescrevendo Valores

In [106]:
data.loc[0, "cabin"] = "B6"
data

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
0,1,1,"Allen, Miss. Elisabeth Walton",female,29.00,0,0,24160.0,211.3375,B6,S,2,,"St Louis, MO"
1,1,1,"Allison, Master. Hudson Trevor",male,0.92,1,2,113781.0,151.5500,C22 C26,S,11,,"Montreal, PQ / Chesterville, ON"
2,1,0,"Allison, Miss. Helen Loraine",female,2.00,1,2,113781.0,151.5500,C22 C26,S,,,"Montreal, PQ / Chesterville, ON"
3,1,0,"Allison, Mr. Hudson Joshua Creighton",male,30.00,1,2,113781.0,151.5500,C22 C26,S,,135.0,"Montreal, PQ / Chesterville, ON"
4,1,0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.00,1,2,113781.0,151.5500,C22 C26,S,,,"Montreal, PQ / Chesterville, ON"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1304,3,0,"Zabour, Miss. Hileni",female,14.50,1,0,2665.0,14.4542,,C,,328.0,
1305,3,0,"Zabour, Miss. Thamine",female,,1,0,2665.0,14.4542,,C,,,
1306,3,0,"Zakarian, Mr. Mapriededer",male,26.50,0,0,2656.0,7.2250,,C,,304.0,
1307,3,0,"Zakarian, Mr. Ortin",male,27.00,0,0,2670.0,7.2250,,C,,,


In [113]:
data

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
0,1,1,"Allen, Miss. Elisabeth Walton",female,29.00,0,0,24160.0,211.3375,B6,S,2,,"St Louis, MO"
1,1,1,"Allison, Master. Hudson Trevor",male,0.92,1,2,113781.0,151.5500,C22 C26,S,11,,"Montreal, PQ / Chesterville, ON"
2,1,0,"Allison, Miss. Helen Loraine",female,2.00,1,2,113781.0,151.5500,C22 C26,S,,,"Montreal, PQ / Chesterville, ON"
3,1,0,"Allison, Mr. Hudson Joshua Creighton",male,30.00,1,2,113781.0,151.5500,C22 C26,S,,135.0,"Montreal, PQ / Chesterville, ON"
4,1,0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.00,1,2,113781.0,151.5500,C22 C26,S,,,"Montreal, PQ / Chesterville, ON"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1304,3,0,"Zabour, Miss. Hileni",female,14.50,1,0,2665.0,14.4542,,C,,328.0,
1305,3,0,"Zabour, Miss. Thamine",female,,1,0,2665.0,14.4542,,C,,,
1306,3,0,"Zakarian, Mr. Mapriededer",male,26.50,0,0,2656.0,7.2250,,C,,304.0,
1307,3,0,"Zakarian, Mr. Ortin",male,27.00,0,0,2670.0,7.2250,,C,,,


In [116]:
for x in data.index:
    if df.loc[x, "fare"] < 1000:
        df.loc[x, "fare"] = 0.1

data.sort_values(by = ["fare"])


Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
363,2,0,"Campbell, Mr. William",male,,0,0,239853.0,0.0000,,S,,,Belfast
963,3,0,"Leonard, Mr. Lionel",male,36.0,0,0,,0.0000,,S,,,
125,1,0,"Fry, Mr. Richard",male,,0,0,112058.0,0.0000,B102,S,,,
384,2,0,"Cunningham, Mr. Alfred Fleming",male,,0,0,239853.0,0.0000,,S,,,Belfast
898,3,0,"Johnson, Mr. William Cahoone Jr",male,19.0,0,0,,0.0000,,S,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114,1,0,"Fortune, Mr. Charles Alexander",male,19.0,3,2,19950.0,263.0000,C23 C25 C27,S,,,"Winnipeg, MB"
50,1,1,"Cardeza, Mrs. James Warburton Martinez (Charlo...",female,58.0,0,1,,512.3292,B51 B53 B55,C,3,,"Germantown, Philadelphia, PA"
183,1,1,"Lesurer, Mr. Gustave J",male,35.0,0,0,,512.3292,B101,C,3,,
49,1,1,"Cardeza, Mr. Thomas Drake Martinez",male,36.0,0,1,,512.3292,B51 B53 B55,C,3,,"Austria-Hungary / Germantown, Philadelphia, PA"
