In [1]:
#Imports generales
import time
import dateutil.parser as dateparser
from datetime import date,timedelta

import pandas as pd
import numpy as np
from sqlalchemy import create_engine, exc
from sqlalchemy import Table, Column, ForeignKey, MetaData, types
from sqlalchemy.sql import select,and_, or_, not_,func, desc
from psycopg2 import connect
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT


#Constantes
DB = "tp3"
HOST = "localhost"
PASS = "password"
USER = "postgres"
PORT = '5432'

# Diagrama DW - Vinoteca - Esquema copo de nieve

![Diagrama](img/Vinoteca.jpg "Diagrama")

#### Crear el "engine" pasando la dirección de la db

In [5]:
#Crear la BD para el tp3. 
con = None
con = connect(user=USER,host=HOST,password=PASS)

try:
    con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
    cur = con.cursor()
    cur.execute('CREATE DATABASE '+ DB)
    cur.close()
    con.close()
except Exception:
    print("La BD de datos ya se encuentra creada")


La BD de datos ya se encuentra creada


In [6]:
engine = create_engine('postgresql://'+USER+':'+PASS+'@'+HOST+':'+PORT+'/'+DB)
engine

Engine(postgresql://postgres:***@localhost:5432/tp3)

In [7]:
metadata = MetaData()

In [8]:
#TIME (TimeStamp, Date, Year,)
time = Table('time', metadata,
    Column('Id', types.Integer, primary_key=True),                     
    Column('TimeStamp', types.TIMESTAMP),         
    Column('Year', types.Integer, primary_key=False),
    Column('Date', types.Date, nullable=False),
 )

#CLASS (Code, Name, Region)
tipo_vino = Table('class', metadata,
    Column('Code', types.Integer, primary_key=True),         
    Column('Name', types.String(length=50)),
    Column('Region', types.String(length=50)),
 )

#CUSTOMER (Code, Name, Address, Phone, BDay, Gender)
customer = Table('customer', metadata,
    Column('Code', types.Integer, primary_key=True),         
    Column('Name', types.String(length=50)),
    Column('Address', types.String(length=50)),
    Column('Phone', types.String(length=50)),
    Column('BDay', types.String(length=50)),
    # Hombre = True, Mujer = False
    Column('Gender', types.Boolean),

 )

#WINE (Code, Name, Type, Vintage, BottlePrice, CasePrice, Class)
wine = Table('wine', metadata,
    Column('Code', types.Integer, primary_key=True),         
    Column('Name', types.String(length=50)),
    Column('Type', types.String(length=50)),
    Column('Vintage', types.Integer),
    Column('BottlePrice', types.Float),
    Column('CasePrice', types.Float),
    Column('Class',types.Integer, ForeignKey("class.Code")),                  
 )

#ORDER (Code,Customer, Wine, Time, nrBottles, nrCases)
orden = Table('order', metadata,
    Column('Code',types.Integer, primary_key=True),         
    Column('Id_customer',types.Integer, ForeignKey("customer.Code")),         
    Column('Id_wine',  types.Integer, ForeignKey("wine.Code") ),
    Column('Id_time',  types.Integer,ForeignKey("time.Id")),
    Column('nrBottles', types.Integer),
    Column('nrCases', types.Integer),          
 )

#VENTAS (ID_WINE,ID_ORDER,PRECIO_ORDEN)
ventas = Table('ventas', metadata,
    Column('Id_venta', types.Integer, primary_key=True),         
    Column('Id_order', types.Integer,ForeignKey("order.Code")),
    Column('Id_wine', types.Integer, ForeignKey("wine.Code")),
    Column('Precio_orden', types.Float)
 )



In [9]:
metadata.create_all(engine)

In [None]:
#Extraer el año del date para que coincida con campo Year
df_time = pd.read_csv("dataTp3/Time.csv")
df_time["Year"] = df_time["TimeStamp"].apply(lambda x: dateparser.parse(x).year)
df_time["Date"] = df_time["TimeStamp"].apply(lambda x: dateparser.parse(x).date())

try:    
    #Cargar dataframes en postgresql
    time = df_time.to_sql('time', engine, index=None,if_exists='append')
except exc.IntegrityError:
    print ("Los datos ya estan cargados")

In [None]:
try:
    df_customer= pd.read_csv("dataTp3/Customer.csv")
    customer = df_customer.to_sql('customer', engine, index=None,if_exists='append')
    df_class= pd.read_csv("dataTp3/Class.csv")
    classes = df_class.to_sql('class', engine, index=None,if_exists='append')
    df_wine= pd.read_csv("dataTp3/Wine.csv")
    wine = df_wine.to_sql('wine', engine, index=None,if_exists='append')
    df_order= pd.read_csv("dataTp3/Order.csv")
    order = df_order.to_sql('order', engine, index=None,if_exists='append')    
except exc.IntegrityError:
    print ("Los datos ya estan cargados")


In [None]:
df_ventas= pd.read_csv("dataTp3/Ventas.csv")
df_ventas["Precio_orden"] = np.arange(100,1100)

In [None]:
#Establece el id_wine que se encuentra  asociado a una orden.
def setear_id_vino(engine):
    conn = engine.connect()
    metadata = MetaData()
    metadata.reflect(bind=conn)
    ventas=metadata.tables.get("ventas")
    order=metadata.tables.get("order")
    consulta = select([order.c.Id_wine]).where(ventas.c.Id_order == order.c.Code )
    result = conn.execute(consulta).fetchall()
    #Se transforma en un data frame el resultado de la consulta.
    df = pd.DataFrame(result)
    return df
    
temp = setear_id_vino(engine)
df_ventas["Id_wine"] = temp
ventas = df_ventas.to_sql('ventas', engine, index=None,if_exists='append')

In [11]:


def calcular_porcentaje_ventas(engine,anio):
    conn = engine.connect()
    metadata = MetaData()
    metadata.reflect(bind=conn)
    ventas=metadata.tables.get("ventas")
    orden=metadata.tables.get("order")
    wine=metadata.tables.get("wine")
    time=metadata.tables.get("time")
    clase=metadata.tables.get("class")
    
    
    
    consulta = select([clase.c.Code,clase.c.Name,func.count(clase.c.Code).label('Cantidad de ventas segun tipo')]).\
                            where( and_(ventas.c.Id_order == orden.c.Code,
                                                                    orden.c.Id_time == time.c.Id,
                                                                       time.c.Year == anio,
                                        ventas.c.Id_wine == wine.c.Code,
                                        wine.c.Class == clase.c.Code
                                       )).group_by(clase.c.Code,clase.c.Name)
        
    
    
    result = conn.execute(consulta).fetchall()
    #Se transforma en un data frame el resultado de la consulta.
    df = pd.DataFrame(result)
    total_tipos = sum(df[2])
    df["Porcentaje"] = df[2].apply(lambda x: (x/total_tipos)*100)
    print(df)

calcular_porcentaje_ventas(engine,2005)
    


      0           1  2  Porcentaje
0   528      George  1    2.127660
1   812       Perez  1    2.127660
2   827        Cruz  1    2.127660
3   434   Carpenter  1    2.127660
4    72      Kelley  1    2.127660
5   504   Patterson  2    4.255319
6   858      Larson  1    2.127660
7   975     Morales  1    2.127660
8   710      Cooper  1    2.127660
9   170     Bradley  1    2.127660
10  424    Peterson  2    4.255319
11  438    Harrison  1    2.127660
12  327       Gomez  1    2.127660
13  280     Spencer  1    2.127660
14  328      Harris  1    2.127660
15  462        Ryan  1    2.127660
16  365      Miller  1    2.127660
17  222    Thompson  1    2.127660
18  645    Hamilton  1    2.127660
19  219       Olson  1    2.127660
20  470        Wood  1    2.127660
21   78        Hart  1    2.127660
22  646    Mcdonald  1    2.127660
23   81        Sims  1    2.127660
24  541    Williams  1    2.127660
25  311       Woods  1    2.127660
26  797    Anderson  2    4.255319
27  218     Russell 

In [13]:
#Temporada que los vinos de tipo X se venden mas?

def buscar_vino_en_temporada(nombre_tipo_vino="Anderson"):
    conn = engine.connect()
    metadata = MetaData()
    metadata.reflect(bind=conn)
    ventas=metadata.tables.get("ventas")
    orden=metadata.tables.get("order")
    wine=metadata.tables.get("wine")
    time=metadata.tables.get("time")
    clase=metadata.tables.get("class")
    
    id_tipo_vino = select([clase.c.Code]).where(and_(
                                                ventas.c.Id_wine == wine.c.Code,
                                                wine.c.Class == clase.c.Code,
                                                clase.c.Name == nombre_tipo_vino 
                                                 )).correlate(clase).limit(1) 
    
    #porque en la bd tenemos mas de un mismo tipo de vino como consecuencia del generador de datos
    
    consulta_temporadas = select([clase.c.Code,time.c.Year,func.count(clase.c.Code).label("quantity") ]).where( and_(ventas.c.Id_order == orden.c.Code,
                                                         orden.c.Id_time == time.c.Id,
                                                         orden.c.Id_wine == wine.c.Code, 
                                                         wine.c.Class == id_tipo_vino
                                                      )).group_by(clase.c.Code,time.c.Year).order_by(desc("quantity"))
    
    result = conn.execute(consulta_temporadas).fetchall()
    #Se transforma en un data frame el resultado de la consulta.
    df = pd.DataFrame(result)
    print (df)

buscar_vino_en_temporada()






     0     1  2
0  797  2005  2
1  797  2006  1
2  797  1996  1
3  797  2013  1


In [55]:
#Cliente que ha realizado mas compras

def buscar_compras_de_clientes():
    conn = engine.connect()
    metadata = MetaData()
    metadata.reflect(bind=conn)
    ventas=metadata.tables.get("ventas")
    orden=metadata.tables.get("order")
    customer=metadata.tables.get("customer")
    time=metadata.tables.get("time")
    
    fecha = date.today() - timedelta(days=365*4)

    consulta_temporadas = select([orden.c.Id_customer,func.count(orden.c.Id_customer).label('c1')]).where(and_(
                                                      ventas.c.Id_order == orden.c.Code,
                                                      orden.c.Id_time == time.c.Id,
                                                      time.c.Date >= fecha)).group_by(orden.c.Id_customer)
    
    result = conn.execute(consulta_temporadas).fetchall()
    #Se transforma en un data frame el resultado de la consulta.
    df = pd.DataFrame(result, columns=["Id_customer", "Quantity"])
    print (df)
    
buscar_compras_de_clientes()

     Id_customer  Quantity
0            887         1
1            707         1
2            601         1
3            297         1
4            318         1
5            855         1
6            959         1
7            693         1
8            294         1
9             28         1
10           919         1
11            15         1
12           140         1
13           407         1
14           200         1
15           299         1
16            40         1
17           437         1
18            53         1
19           180         1
20           371         1
21           726         1
22           814         1
23            58         1
24           137         1
25           709         1
26           185         1
27           547         1
28           405         1
29           553         1
..           ...       ...
86            37         1
87           173         1
88            81         1
89           792         1
90            12         1
9