In [3]:
import pandas 
import numpy as np
pandas.set_option('display.notebook_repr_html', False)

In [4]:
## crea el DataFrame
## los DataFrame en Pandas son diccionarios
df = pandas.DataFrame( {'index': list(range(1,6)),
                        'name': ['A', 'B', 'C', 'D', 'E'],
                        'value': [3.03, 5.14, 0.40, 1.13, 8.25]})
df

   index name  value
0      1    A   3.03
1      2    B   5.14
2      3    C   0.40
3      4    D   1.13
4      5    E   8.25

## Almacenamiento del archivo en disco


In [5]:
df.to_csv(
    "data.csv",   #el nombre del archivo
    index = False) #Imprime los nombres de las filas?

## Ejemplo de conversión de csv a json

In [9]:
pandas.read_csv("data.csv").to_json("data.json")

## Lectura y escritura de archivos usando Pandas

In [10]:
## crea el DataFrame
## los DataFrame en Pandas son diccionarios
df = pandas.DataFrame( {"index": list(range(1,6)),
                       "name": ["A", "B", "C", "D", "E"],
                       "value": [3.03, 5.14, 0.40, 1.13, 8.25]})

## Formato nativo de Python con Pandas

In [12]:
## pandas.DataFrame
df.to_pickle("data.pickle")

In [14]:
pandas.read_pickle("data.pickle")

   index name  value
0      1    A   3.03
1      2    B   5.14
2      3    C   0.40
3      4    D   1.13
4      5    E   8.25

## Archivos delimitados por caracteres con Pandas

In [15]:
df.to_csv("data.csv",
         index=False)
print(open("data.csv", "r").read())

index,name,value
1,A,3.03
2,B,5.14
3,C,0.4
4,D,1.13
5,E,8.25



In [16]:
pandas.read_csv("data.csv")

   index name  value
0      1    A   3.03
1      2    B   5.14
2      3    C   0.40
3      4    D   1.13
4      5    E   8.25

In [18]:
## opciones mas importantes
pandas.read_csv("data.csv",
               sep = ",",
               thousands = None,
               decimal = ".")

   index name  value
0      1    A   3.03
1      2    B   5.14
2      3    C   0.40
3      4    D   1.13
4      5    E   8.25

#### En español: ";" como separador de campos y "," como separador decimal

In [21]:
## escribe el archivo
df.to_csv("data.csv2",
        sep = ";",
         decimal = ",",
         index = False
         )
print(open("data.csv2", "r").read())

index;name;value
1;A;3,03
2;B;5,14
3;C;0,4
4;D;1,13
5;E;8,25



In [23]:
pandas.read_csv('data.csv2',
                 sep = ';',
                 thousands = None,
                 decimal = ',')

   index name  value
0      1    A   3.03
1      2    B   5.14
2      3    C   0.40
3      4    D   1.13
4      5    E   8.25

## Formato de ancho fijo con Pandas

In [24]:
# crea un archivo separado por multiples espacios en blanco.
# Los nombres de las columnas están separados por comas.
text = """indexnames      valuescodes
    1john wick    2.13   10
    2mark twin    3.14   11
    3louis ng     4.34   12
    4dan brown    2.31   13
    5ann marie    4.98   14"""
with open("data.txt", "w") as f:
    f.write(text)

## verifica el archivo creado
print(open("data.txt", "r").read())

indexnames      valuescodes
    1john wick    2.13   10
    2mark twin    3.14   11
    3louis ng     4.34   12
    4dan brown    2.31   13
    5ann marie    4.98   14


In [25]:
pandas.read_fwf("data.txt",
               colspecs = "infer",
               widths = [5, 9, 8, 5])

   index      names  values  codes
0      1  john wick    2.13     10
1      2  mark twin    3.14     11
2      3   louis ng    4.34     12
3      4  dan brown    2.31     13
4      5  ann marie    4.98     14

## JSON con Pandas

In [26]:
# escribe el DataFrame al archivo

df.to_json("data.json")

# verifica el archivo creado
print(open("data.json", "r").read())

{"index":{"0":1,"1":2,"2":3,"3":4,"4":5},"name":{"0":"A","1":"B","2":"C","3":"D","4":"E"},"value":{"0":3.03,"1":5.14,"2":0.4,"3":1.13,"4":8.25}}


In [27]:
## Lo recupera
pandas.read_json("data.json")

   index name  value
0      1    A   3.03
1      2    B   5.14
2      3    C   0.40
3      4    D   1.13
4      5    E   8.25

### HTML con Pandas

In [28]:
df.to_html("data.html",
          index = False)

## verifica el archivo creado
print(open("data.html",  "r").read())

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th>index</th>
      <th>name</th>
      <th>value</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>1</td>
      <td>A</td>
      <td>3.03</td>
    </tr>
    <tr>
      <td>2</td>
      <td>B</td>
      <td>5.14</td>
    </tr>
    <tr>
      <td>3</td>
      <td>C</td>
      <td>0.40</td>
    </tr>
    <tr>
      <td>4</td>
      <td>D</td>
      <td>1.13</td>
    </tr>
    <tr>
      <td>5</td>
      <td>E</td>
      <td>8.25</td>
    </tr>
  </tbody>
</table>


In [29]:
pandas.read_html("data.html")

[   index name  value
 0      1    A   3.03
 1      2    B   5.14
 2      3    C   0.40
 3      4    D   1.13
 4      5    E   8.25]

## Excel con Pandas

In [30]:
df.to_excel("data.xlsx",
           index = False)

In [31]:
pandas.read_excel("data.xlsx")

   index name  value
0      1    A   3.03
1      2    B   5.14
2      3    C   0.40
3      4    D   1.13
4      5    E   8.25

## HDF5 con Pandas

In [32]:
# escribe el archivo a disco
df.to_hdf("data.h5",
         key="G1")

In [33]:
# lECTURA
pandas.read_hdf("data.h5",
               key = "G1")

   index name  value
0      1    A   3.03
1      2    B   5.14
2      3    C   0.40
3      4    D   1.13
4      5    E   8.25

In [34]:
pandas.read_hdf("data.h5")

   index name  value
0      1    A   3.03
1      2    B   5.14
2      3    C   0.40
3      4    D   1.13
4      5    E   8.25

## STATA con PANDAS

In [35]:
df.to_stata("data.dta")
pandas.read_stata("data.dta")

   level_0  index name  value
0        0      1    A   3.03
1        1      2    B   5.14
2        2      3    C   0.40
3        3      4    D   1.13
4        4      5    E   8.25

# SQL con Pandas

In [None]:
pandas.DataFrame.to_sql(name = df, con = conn, ifexists = True)

In [36]:
pip install PyPDF2

Collecting PyPDF2
Note: you may need to restart the kernel to use updated packages.  Downloading PyPDF2-1.26.0.tar.gz (77 kB)
Building wheels for collected packages: PyPDF2
  Building wheel for PyPDF2 (setup.py): started
  Building wheel for PyPDF2 (setup.py): finished with status 'done'
  Created wheel for PyPDF2: filename=PyPDF2-1.26.0-py3-none-any.whl size=61087 sha256=0f027decd59d2959bce5c81a433d4d04da52c13e92ad5da4afd4458f80dbea0f

  Stored in directory: c:\users\ofaa\appdata\local\pip\cache\wheels\b1\1a\8f\a4c34be976825a2f7948d0fa40907598d69834f8ab5889de11
Successfully built PyPDF2
Installing collected packages: PyPDF2
Successfully installed PyPDF2-1.26.0


In [39]:
import PyPDF2
# pdfFileObj = open('data.pdf', 'rb')
# pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
# pageObj = pdfReader.getPage(0)
# pageObj.extractText()

## Lectura de Matlab y Octave

In [40]:
import scipy.io as sio
sio.savemat("data", {"df": df})
sio.loadmat("data")

{'__header__': b'MATLAB 5.0 MAT-file Platform: nt, Created on: Fri Oct 30 11:16:58 2020',
 '__version__': '1.0',
 '__globals__': [],
 'df': array([[(array([[None]], dtype=object), array([[None]], dtype=object), array([[None]], dtype=object))]],
       dtype=[('index', 'O'), ('name', 'O'), ('value', 'O')])}

## Lectura de XBASE

In [41]:
# import dbfread
#
# for x in dbfread.DBF('files/data.dbf'):
#    print(x)