## Pruebas con pandas: Series y DataFrame

In [2]:
import pandas as pd
import numpy as np
from pandas import Series, DataFrame

### Series

In [4]:
s1 = Series(np.arange(10,100,10))
s1

0    10
1    20
2    30
3    40
4    50
5    60
6    70
7    80
8    90
dtype: int32

In [5]:
print(s1.values)
print(s1.index)

[10 20 30 40 50 60 70 80 90]
RangeIndex(start=0, stop=9, step=1)


In [6]:
s2 = Series(np.arange(10,100,10))
s2

0    10
1    20
2    30
3    40
4    50
5    60
6    70
7    80
8    90
dtype: int32

r = s1 + s2
r

In [8]:
s3 = Series(list(range(5)), index=list('abcde'))
s3

a    0
b    1
c    2
d    3
e    4
dtype: int64

In [9]:
s3['b']

1

In [10]:
s3[['b','d']]

b    1
d    3
dtype: int64

In [11]:
s3[0]

0

In [14]:
r[(r>30) & (r<80)]

1    40
2    60
dtype: int32

In [15]:
s3[:3]

a    0
b    1
c    2
dtype: int64

In [16]:
s3['b':'d']

b    1
c    2
d    3
dtype: int64

In [18]:
r = np.exp(s3)
r

a     1.000000
b     2.718282
c     7.389056
d    20.085537
e    54.598150
dtype: float64

In [20]:
s3 = Series(list(range(5)), index=list('abcde'))
s4 = Series(list(range(5)), index=list('aeiou'))
r = s3 + s4
r


a    0.0
b    NaN
c    NaN
d    NaN
e    5.0
i    NaN
o    NaN
u    NaN
dtype: float64

In [21]:
r.isnull()

a    False
b     True
c     True
d     True
e    False
i     True
o     True
u     True
dtype: bool

### DataFrame

In [22]:
dt = DataFrame(np.arange(20).reshape(5,-1))
dt

Unnamed: 0,0,1,2,3
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15
4,16,17,18,19


In [23]:
dt = DataFrame(np.arange(20).reshape(5,-1), columns=list('abcd'))
dt

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15
4,16,17,18,19


In [24]:
dt['a']

0     0
1     4
2     8
3    12
4    16
Name: a, dtype: int32

In [25]:
dt['a'][0]

0

In [26]:
dt.loc[1]

a    4
b    5
c    6
d    7
Name: 1, dtype: int32

In [27]:
dt.loc[1]['b']

5

In [28]:
dt[['a','c']]

Unnamed: 0,a,c
0,0,2
1,4,6
2,8,10
3,12,14
4,16,18


In [29]:
dt.b

0     1
1     5
2     9
3    13
4    17
Name: b, dtype: int32

In [30]:
L = dt.values.tolist()
L

[[0, 1, 2, 3],
 [4, 5, 6, 7],
 [8, 9, 10, 11],
 [12, 13, 14, 15],
 [16, 17, 18, 19]]

### Carga de ficheros con pandas

In [32]:
dt = pd.read_csv('ficheros_templates/Pedidos.txt', sep=';')
dt.head()

Unnamed: 0,idpedido,cliente,idempleado,idempresa,importe,pais
0,10248,WILMK,5,3,32.38,Finlandia
1,10249,TOMSP,6,1,11.61,Alemania
2,10250,HANAR,4,2,65.83,Brasil
3,10251,VICTE,3,1,41.34,Francia
4,10252,SUPRD,4,2,51.3,Belgica


In [33]:
dt.tail(10)

Unnamed: 0,idpedido,cliente,idempleado,idempresa,importe,pais
820,11068,QUEEN,8,2,81.75,Brasil
821,11069,TORTU,1,2,15.67,Mexico
822,11070,LEHMS,2,1,136.0,Alemania
823,11071,LILAS,1,1,0.93,Venezuela
824,11072,ERNSH,4,2,258.64,Austria
825,11073,PERIC,2,2,24.95,Mexico
826,11074,SIMOB,7,2,18.44,Dinamarca
827,11075,RICSU,8,2,6.19,Suiza
828,11076,BONAP,4,2,38.28,Francia
829,11077,RATTC,1,2,8.53,Estados Unidos


In [34]:
dt.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 830 entries, 0 to 829
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   idpedido    830 non-null    int64  
 1   cliente     830 non-null    object 
 2   idempleado  830 non-null    int64  
 3   idempresa   830 non-null    int64  
 4   importe     830 non-null    float64
 5   pais        830 non-null    object 
dtypes: float64(1), int64(3), object(2)
memory usage: 39.0+ KB


In [35]:
dt.describe()

Unnamed: 0,idpedido,idempleado,idempresa,importe
count,830.0,830.0,830.0,830.0
mean,10662.5,4.403614,2.008434,78.244205
std,239.744656,2.499648,0.778899,116.779294
min,10248.0,1.0,1.0,0.02
25%,10455.25,2.0,1.0,13.38
50%,10662.5,4.0,2.0,41.36
75%,10869.75,7.0,3.0,91.43
max,11077.0,9.0,3.0,1007.64


In [36]:
dt[dt.pais == 'Finlandia']

Unnamed: 0,idpedido,cliente,idempleado,idempresa,importe,pais
0,10248,WILMK,5,3,32.38,Finlandia
18,10266,WARTH,3,3,25.73,Finlandia
22,10270,WARTH,1,1,136.54,Finlandia
72,10320,WARTH,5,3,34.57,Finlandia
85,10333,WARTH,5,3,0.59,Finlandia
164,10412,WARTH,8,2,3.77,Finlandia
168,10416,WARTH,8,3,22.72,Finlandia
189,10437,WARTH,8,1,19.97,Finlandia
207,10455,WARTH,8,2,180.45,Finlandia
278,10526,WARTH,4,2,58.59,Finlandia


In [38]:
dtFin = dt[(dt.pais == 'Finlandia') & (dt.importe >= 20.0) & (dt.importe <= 50.0)]
dtFin

Unnamed: 0,idpedido,cliente,idempleado,idempresa,importe,pais
0,10248,WILMK,5,3,32.38,Finlandia
18,10266,WARTH,3,3,25.73,Finlandia
72,10320,WARTH,5,3,34.57,Finlandia
168,10416,WARTH,8,3,22.72,Finlandia
425,10673,WILMK,2,1,22.76,Finlandia
662,10910,WILMK,1,3,38.11,Finlandia
777,11025,WARTH,6,3,29.17,Finlandia


In [40]:
dtFin.to_csv('ficheros_templates/pedidos_finlandia.csv', sep=';', index=False, decimal=',')

In [41]:
L = dt.pais.unique()
L

array(['Finlandia', 'Alemania', 'Brasil', 'Francia', 'Belgica', 'Suiza',
       'Venezuela', 'Austria', 'Mexico', 'Estados Unidos', 'Suecia',
       'Italia', 'Espanya', 'Reino Unido', 'Irlanda', 'Portugal',
       'Canada', 'Dinamarca', 'Polonia', 'Noruega', 'Argentina'],
      dtype=object)