In [None]:
#Imports generales
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy import MetaData, types 
from sqlalchemy import Table, Column, ForeignKey
from psycopg2 import connect
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT

#Constantes
DB = "tp3"
HOST = "localhost"
PASS = "password"
USER = "postgres"

#### Crear el "engine" pasando la dirección de la db

In [None]:
#Crear la BD para el tp3. 
con = None
con = connect(user=USER,host=HOST,password=PASS)

con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
cur = con.cursor()
cur.execute('CREATE DATABASE '+ DB)
cur.close()
con.close()


In [None]:
engine = create_engine('postgresql://postgres:password@localhost:5432/'+DB)
engine

In [None]:
metadata = MetaData()

In [None]:
#TIME (TimeStamp, Date, Year,)
time = Table('time', metadata,
    Column('Id', types.Integer, primary_key=True),                     
    Column('TimeStamp', types.TIMESTAMP),         
    Column('Year', types.Integer, primary_key=False),
    Column('Date', types.Date, nullable=False),
 )

#CLASS (Code, Name, Region)
tipo_vino = Table('class', metadata,
    Column('Code', types.Integer, primary_key=True),         
    Column('Name', types.String(length=50)),
    Column('Region', types.String(length=50)),
 )

#CUSTOMER (Code, Name, Address, Phone, BDay, Gender)
customer = Table('customer', metadata,
    Column('Code', types.Integer, primary_key=True),         
    Column('Name', types.String(length=50)),
    Column('Address', types.String(length=50)),
    Column('Phone', types.String(length=50)),
    Column('BDay', types.String(length=50)),
    # Hombre = True, Mujer = False
    Column('Gender', types.Boolean),

 )


#WINE (Code, Name, Type, Vintage, BottlePrice, CasePrice, Class)

wine = Table('wine', metadata,
    Column('Code', types.Integer, primary_key=True),         
    Column('Name', types.String(length=50)),
    Column('Type', types.String(length=50)),
    Column('Vintage', types.Integer),
    Column('BottlePrice', types.Float),
    Column('CasePrice', types.Float),
    Column('Class',types.Integer, ForeignKey("class.Code")),                  
 )

#ORDER (Code,Customer, Wine, Time, nrBottles, nrCases)
orden = Table('order', metadata,
    Column('Code',types.Integer, primary_key=True),         
    Column('Id_customer',types.Integer, ForeignKey("customer.Code")),         
    Column('Id_wine',  types.Integer, ForeignKey("wine.Code") ),
    Column('Id_time',  types.Integer,ForeignKey("time.Id")),
    Column('nrBottles', types.Integer),
    Column('nrCases', types.Integer),          
 )

#VENTAS (ID_WINE,ID_ORDER,PRECIO_ORDEN)
ventas = Table('ventas', metadata,
    Column('Id_venta', types.Integer, primary_key=True),         
    Column('Id_order', types.Integer,ForeignKey("order.Code")),
    Column('Id_wine', types.Integer, ForeignKey("wine.Code")),
    Column('Precio_orden', types.Float)
 )



In [None]:
metadata.create_all(engine)

In [None]:
#Cargar datos de prueba
import numpy as np
import dateutil.parser as dateparser

#Extraer el año del date para que coincida con campo Year
df_time = pd.read_csv("dataTp3/Time.csv")
df_time["Year"] = df_time["TimeStamp"].apply(lambda x: dateparser.parse(x).year)
df_time["Date"] = df_time["TimeStamp"].apply(lambda x: dateparser.parse(x).date())
#Cargar dataframes en postgresql
time = df_time.to_sql('time', engine, index=None,if_exists='append')
#df_time

In [None]:
df_customer= pd.read_csv("dataTp3/Customer.csv")
customer = df_customer.to_sql('customer', engine, index=None,if_exists='append')
df_class= pd.read_csv("dataTp3/Class.csv")
classes = df_class.to_sql('class', engine, index=None,if_exists='append')
df_wine= pd.read_csv("dataTp3/Wine.csv")
wine = df_wine.to_sql('wine', engine, index=None,if_exists='append')
df_order= pd.read_csv("dataTp3/Order.csv")
order = df_order.to_sql('order', engine, index=None,if_exists='append')


In [None]:
df_ventas= pd.read_csv("dataTp3/Ventas.csv")
df_ventas["Precio_orden"] = np.arange(100,1100)

In [None]:
from sqlalchemy.sql import select,and_
def setear_id_vino(engine):
    """
        Establece el id_wine que se encuentra 
        asociado a una orden.
    :param engine:
    :return: DataFrama nx1
    """
    conn = engine.connect()
    metadata = MetaData()
    metadata.reflect(bind=conn)
    ventas=metadata.tables.get("ventas")
    order=metadata.tables.get("order")
    consulta = select([order.c.Id_wine]).where(ventas.c.Id_order == order.c.Code )
    result = conn.execute(consulta).fetchall()
    #Se transforma en un data frame el resultado de la consulta.
    df = pd.DataFrame(result)
    return df
    
temp = setear_id_vino(engine)
df_ventas["Id_wine"] = temp
ventas = df_ventas.to_sql('ventas', engine, index=None,if_exists='append')

In [89]:
#Calcular porcentajes de tipos de vinos mas vendidos
#
#FALTA TERMINAR!!!
#
from sqlalchemy.sql import select,and_, or_, not_
from sqlalchemy import func, desc

#select([users, addresses]).where(users.c.id == addresses.c.user_id)
#select.group_by()
def calcular_porcentaje_ventas(engine,anio):
    conn = engine.connect()
    metadata = MetaData()
    metadata.reflect(bind=conn)
    ventas=metadata.tables.get("ventas")
    orden=metadata.tables.get("order")
    wine=metadata.tables.get("wine")
    time=metadata.tables.get("time")
    clase=metadata.tables.get("class")
    
    
    
    consulta = select([clase.c.Code,clase.c.Name,func.count(clase.c.Code).label('Cantidad de ventas segun tipo')]).\
                            where( and_(ventas.c.Id_order == orden.c.Code,
                                                                    orden.c.Id_time == time.c.Id,
                                                                       time.c.Year == anio,
                                        ventas.c.Id_wine == wine.c.Code,
                                        wine.c.Class == clase.c.Code
                                       )).group_by(clase.c.Code,clase.c.Name)
        
    
    
    result = conn.execute(consulta).fetchall()
    #Se transforma en un data frame el resultado de la consulta.
    df = pd.DataFrame(result)
    #print (df)
    total_tipos = sum(df[2])
    df["Porcentaje"] = df[2].apply(lambda x: (x/total_tipos)*100)
    
    print(df)
    #for row in result:
    #    print(row)

calcular_porcentaje_ventas(engine,2005)
    


      0           1  2  Porcentaje
0   528      George  1    2.127660
1   812       Perez  1    2.127660
2   827        Cruz  1    2.127660
3   434   Carpenter  1    2.127660
4    72      Kelley  1    2.127660
5   504   Patterson  2    4.255319
6   858      Larson  1    2.127660
7   975     Morales  1    2.127660
8   710      Cooper  1    2.127660
9   170     Bradley  1    2.127660
10  424    Peterson  2    4.255319
11  438    Harrison  1    2.127660
12  327       Gomez  1    2.127660
13  280     Spencer  1    2.127660
14  328      Harris  1    2.127660
15  462        Ryan  1    2.127660
16  365      Miller  1    2.127660
17  222    Thompson  1    2.127660
18  645    Hamilton  1    2.127660
19  219       Olson  1    2.127660
20  470        Wood  1    2.127660
21   78        Hart  1    2.127660
22  646    Mcdonald  1    2.127660
23   81        Sims  1    2.127660
24  541    Williams  1    2.127660
25  311       Woods  1    2.127660
26  797    Anderson  2    4.255319
27  218     Russell 

In [97]:
#Temporada que los vinos de tipo X se venden mas?

def buscar_vino_en_temporada(nombre_tipo_vino="Anderson"):
    conn = engine.connect()
    metadata = MetaData()
    metadata.reflect(bind=conn)
    ventas=metadata.tables.get("ventas")
    orden=metadata.tables.get("order")
    wine=metadata.tables.get("wine")
    time=metadata.tables.get("time")
    clase=metadata.tables.get("class")
    
    id_tipo_vino = select([clase.c.Code]).where(and_(
                                                ventas.c.Id_wine == wine.c.Code,
                                                wine.c.Class == clase.c.Code,
                                                clase.c.Name == nombre_tipo_vino 
                                                 )).correlate(clase).first()
        
    
    consulta_temporadas = select([clase.c.Code,time.c.Year,func.count(clase.c.Code) ]).where( and_(ventas.c.Id_order == orden.c.Code,
                                                         orden.c.Id_time == time.c.Id,
                                                         orden.c.Id_wine == wine.c.Code, 
                                                         wine.c.Class == id_tipo_vino
                                                      )).group_by(clase.c.Code,time.c.Year)
    
    result = conn.execute(consulta_temporadas).fetchall()
    #Se transforma en un data frame el resultado de la consulta.
    df = pd.DataFrame(result)
    print (df)

buscar_vino_en_temporada()


#Cliente que ha realizado mas compras



TimeoutError: QueuePool limit of size 5 overflow 10 reached, connection timed out, timeout 30

In [None]:
print "asasaaaaa"

In [None]:
df_class


#### Hacer la query especificando el "engine" que se desea usar

In [None]:
df_customer = pd.read_sql_query('select * from "Customer"',con=engine)

In [None]:
df_customer.to_json('/tmp/test.json')

In [None]:
json_df  = pd.read_json('/tmp/test.json')
json_df

In [None]:
json_df['Birthday'] = pd.to_datetime(json_df['Birthday'], unit='ns')
json_df

In [None]:
df.info()

In [None]:
df.describe()

Link a [Pandas NB](Starting%20with%20pandas.ipynb) para ver join, merge, append, etc

#### Agregando un nuevo registro a nuestra tabla "Customer" con pandas

In [None]:
new_df = pd.DataFrame([[4, pd.datetime(2011, 1, 10), 'F']], columns=df.columns)
new_df

In [None]:
new_df.to_sql('Customer', engine, if_exists='append', index=None)

#### Agregando una nueva tabla a nuestra db desde pandas

In [None]:
new_table = pd.DataFrame([], columns=['WineCode', 'Type', 'Vintage'])
new_table

In [None]:
new_table.to_sql('Wine', engine, index=None)

#### Ahora hagamos lo mismo con sqlalchemy

In [None]:
from sqlalchemy import MetaData, types
from sqlalchemy import Table, Column

In [None]:
metadata = MetaData()

In [None]:
time = Table('Time', metadata,
    Column('TimeCode', types.Integer, primary_key=True),
    Column('Date', types.DateTime, nullable=False),
 )

In [None]:
metadata.create_all(engine)

### Agregando elementos a Wine con pandas

In [None]:
data = [[1, 'White', 2000],
        [2, 'red', 2015],
        [3, 'rose', 2014]]

In [None]:
new_df = pd.DataFrame(data, columns=df_wine.columns)

In [None]:
new_df.to_sql('Wine', engine, if_exists='append', index=None)

In [None]:
df_wine = pd.read_sql_query('select * from "Wine"',con=engine)
df_wine

In [None]:
data = [[1, 'White', pd.datetime(2000, 10, 10)],
        [2, 'red', pd.datetime(2010, 9, 9)],
        [3, 'rose', pd.datetime(2011, 9, 9)]]

In [None]:
new_df = pd.DataFrame(data, columns=df_wine.columns)
new_df['Vintage']

In [None]:
new_df.to_json('/tmp/lero.json', date_unit='ns')

In [None]:
json_demo = pd.read_json('/tmp/lero.json')
jso

In [None]:
new_df

In [None]:
json_demo['Vintage'] = pd.to_datetime(json_demo['Vintage'], unit='ns')

In [None]:
json_demo

In [None]:
new_df

In [None]:
json_demo.columns.values

In [None]:
json_demo.values

In [None]:
pd.merge(json_demo, new_df, on=list(json_demo.columns.values), how='outer')

In [None]:
pd.Series?

In [None]:
score = pd.Series([10, 9, 8], name='score')
score

In [None]:
out = pd.concat([json_demo, score], axis=1)
new_row = pd.DataFrame([[4, 'espumeante',pd.datetime(2000,2,2)]], columns=new_df.columns)

In [None]:
append_df = new_df.append(new_row)

In [None]:
append_df.to_sql('Wine', engine, if_exists='append', index=None)