In [112]:
import pandas as pd
import numpy as np

In [113]:
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))
ser = pd.Series(mydict)


TypeError: 'list' object is not callable

## How to convert the index of a series into a column of a dataframe?

In [None]:
df = ser.to_frame().reset_index()
print(df.head())

## How to combine many series to form a dataframe?

In [None]:
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))

In [None]:
df = pd.concat([ser1, ser2], axis=1)
print(df.head())

## How to assign name to the series’ index?

In [None]:
ser = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser.name = 'alphabets'
ser.head()

## How to get the items of series A not present in series B?

In [114]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])
ser1.name = 'ser1'
# isin() valida se o valor está presente na série, e o ~ inverte o resultado
ser1[~ser1.isin(ser2)]

0    1
1    2
2    3
Name: ser1, dtype: int64

## How to get the items not common to both series A and series B?

In [115]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

In [116]:
ser_u = pd.Series(np.union1d(ser1, ser2))  # union / unique no repeat
ser_i = pd.Series(np.intersect1d(ser1, ser2))  # intersect / common
print(ser_i)

0    4
1    5
dtype: int64


In [117]:
ser_u[~ser_u.isin(ser_i)]

0    1
1    2
2    3
5    6
6    7
7    8
dtype: int64

In [118]:
~ser_u.isin(ser_i)

0     True
1     True
2     True
3    False
4    False
5     True
6     True
7     True
dtype: bool

In [119]:
ser_u.isin(ser_i)

0    False
1    False
2    False
3     True
4     True
5    False
6    False
7    False
dtype: bool

## How to get the minimum, 25th percentile, median, 75th, and max of a numeric series?

In [120]:
ser = pd.Series(np.random.normal(10, 5, 25))

In [121]:
print(ser)

0      4.934647
1     17.749790
2     20.101350
3      4.082385
4     11.297057
5      0.404312
6     18.280546
7     15.774667
8     11.217399
9      6.392146
10     7.005418
11     7.507698
12    10.353014
13     9.385542
14    10.179398
15    10.952471
16    12.735673
17    13.906079
18    10.715019
19     2.699908
20    14.784545
21     4.996429
22    10.714498
23     5.338102
24    13.561890
dtype: float64


In [122]:
np.percentile(ser, q=[0, 25, 50, 75, 100])

array([ 0.40431198,  6.39214626, 10.71449766, 13.56189042, 20.10134976])

In [123]:
ser.describe()

count    25.000000
mean     10.202799
std       5.032974
min       0.404312
25%       6.392146
50%      10.714498
75%      13.561890
max      20.101350
dtype: float64

## How to get frequency counts of unique items of a series?

In [124]:
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))

TypeError: 'list' object is not callable

In [None]:
ser.value_counts()

# How to keep only top 2 most frequent values as it is and replace everything else as Other?

In [None]:
np.random.RandomState(100)
#cria uma Series chamada ser que contém 12 números inteiros aleatórios entre 1 e 4.
ser = pd.Series(np.random.randint(1, 5, [12]))

In [None]:
# ser.value_counts().index[:2] retorna os dois índices com as maiores contagens.
# ~ser.isin(ser.value_counts().index[:2]) retorna uma Series booleana que é True para os valores em ser que não estão nos dois índices com as maiores contagens
# ser[~ser.isin(ser.value_counts().index[:2])] = 'Other' substitui todos os valores em ser que não estão nos dois índices com as maiores contagens por 'Other'.

In [None]:
print("Top 2 freq:", ser.value_counts())
ser[~ser.isin(ser.value_counts().index[:2])] = 'Other'
ser

# How to bin a numeric series to 10 groups of equal size?

In [None]:
ser = pd.Series(np.random.random(20))
print(ser.head())

In [None]:
pd.qcut(ser, q=[0, .10, .20, .3, .4, .5, .6, .7, .8, .9, 1], labels=['1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th', '10th'])

# How to convert a numpy array to a dataframe of given shape?

In [None]:
ser = pd.Series(np.random.randint(1, 10, 35))
print(ser.head())

In [None]:
teste = ser.values.reshape(7, 5)
print(teste)

In [None]:
print(teste[1])

In [None]:
df = pd.DataFrame(ser.values.reshape(7, 5))

In [None]:
df

# How to find the positions of numbers that are multiples of 3 from a series?

In [None]:
# cria uma Series chamada ser com 7 valores aleatórios entre 1 e 10.
ser = pd.Series(np.random.randint(1, 10, 7))
ser.head(7)

In [None]:
np.argwhere(ser % 3==0) #retorna os índices dos valores em ser que são múltiplos de 3.

# How to extract items at given positions from a series

In [None]:
# # ser = pd.Series(list('abcdefghijklmnopqrstuvwxyz'))
# # ser = pd.Series(np.array(list('abcdefghijklmnopqrstuvwxyz')))
# ser = pd.Series(np.array(list('abcdefghijklmnopqrstuvwxyz')))
# pos = [0, 4, 8, 14, 20]
# # pos

In [None]:
# ser = pd.Series(np.array(list('abcdefghijklmnopqrstuvwxyz')))
# pos = np.array(pos)
# 
# print(ser[pos])


# How to stack two series vertically and horizontally ?

In [129]:
ser1 = pd.Series(range(5))
ser2 = pd.Series(['a', 'b', 'c', 'd', 'e'])

In [133]:
pd.concat([ser1, ser2], axis=1)

Unnamed: 0,0,1
0,0,a
1,1,b
2,2,c
3,3,d
4,4,e


# How to get the positions of items of series A in another series B?

In [134]:
ser1 = pd.Series([10, 9, 6, 5, 3, 1, 12, 8, 13])
ser2 = pd.Series([1, 3, 10, 13])

In [135]:
[np.where(i == ser1)[0].tolist()[0] for i in ser2]

[5, 4, 0, 8]

In [136]:
[pd.Index(ser1).get_loc(i) for i in ser2]

[5, 4, 0, 8]

In [140]:
for i in ser2:
    print(i)

1
3
10
13


In [142]:
pd.Index(ser1).get_loc(9)

1

# How to compute the mean squared error on a truth and predicted series?

In [144]:
truth = pd.Series(range(10))
pred = pd.Series(range(10)) + np.random.random(10) # cria uma Series com 10 valores, começando em 0 e terminando em 9. A cada valor é adicionado um valor aleatório entre 0 e 1.

In [147]:
np.mean((truth-pred)**2)

0.32205093010380537

In [150]:
truth - pred

0   -0.393563
1   -0.499802
2   -0.637821
3   -0.215884
4   -0.232284
5   -0.895013
6   -0.623694
7   -0.670336
8   -0.815210
9   -0.066914
dtype: float64

In [149]:
(truth - pred)**2

0    0.154892
1    0.249802
2    0.406815
3    0.046606
4    0.053956
5    0.801049
6    0.388995
7    0.449351
8    0.664567
9    0.004478
dtype: float64

# How to convert the first character of each element in a series to uppercase?

In [151]:
ser = pd.Series(['how', 'to', 'kick', 'ass?'])

In [152]:
ser.map(lambda x: x.title())

0     How
1      To
2    Kick
3    Ass?
dtype: object

In [154]:
ser.map(lambda x: x[0].upper() + x[1:])

0     How
1      To
2    Kick
3    Ass?
dtype: object

In [155]:
pd.Series([i.title() for i in ser])

0     How
1      To
2    Kick
3    Ass?
dtype: object

# How to