# TESTING GDOWN

This notebook is to test out downloading an Excel file from
a Google Drive link and ensuring that the content from the
file is not corrupted

In [1]:
# Installing GDown to download file straight
# from GDrive and openpyxl to read Excel files
!pip install gdown
!pip install openpyxl




In [2]:
# Necessary imports to utilize GDown
import pandas as pd
import gdown
import os


In [3]:
# We get the link from GDrive where the needed file is located
url = "https://docs.google.com/spreadsheets/d/14MiqEzjcEcIXA9gcdo2JHfIBi0QhtJWcyCtLQAFxPaI/"

# This is neat: it splits the url starting from the second-to-last /
# thus resulting in getting only the file_id
# Also, this does not take in consideration the last /, but only the content
# between both of them
file_id = url.split('/')[-2]

#This is just the website we need to get to donwload the file we want
# Minus the file ID
prefix = 'https://drive.google.com/uc?/export=download&id='

# This is to set the name of the file we want
excel_file = "dataset1.xlsx"

# Using gdown, we download the file from the prefix+file_id and
# name the file per what we set
gdown.download(prefix+file_id, excel_file)


Downloading...
From (uriginal): https://drive.google.com/uc?/export=download&id=14MiqEzjcEcIXA9gcdo2JHfIBi0QhtJWcyCtLQAFxPaI
From (redirected): https://docs.google.com/spreadsheets/d/14MiqEzjcEcIXA9gcdo2JHfIBi0QhtJWcyCtLQAFxPaI/export?format=xlsx
To: /home/jackzhong12/code/sabrinaauger/wino/notebooks/dataset1.xlsx
16.2MB [00:01, 14.9MB/s]


'dataset1.xlsx'

In [4]:
# This cell is just to confirm what url.split did
file_id


'14MiqEzjcEcIXA9gcdo2JHfIBi0QhtJWcyCtLQAFxPaI'

In [5]:
# # And from there, we just read the content and voilà!

df = pd.read_excel(excel_file)

df.head()


Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,variety,winery
0,0,US,This tremendous 100% varietal wine hails from ...,Martha's Vineyard,96,235.0,California,Napa Valley,Napa,Cabernet Sauvignon,Heitz
1,1,Spain,"Ripe aromas of fig, blackberry and cassis are ...",Carodorum Selección Especial Reserva,96,110.0,Northern Spain,Toro,,Tinta de Toro,Bodega Carmen Rodríguez
2,2,US,Mac Watson honors the memory of a wine once ma...,Special Selected Late Harvest,96,90.0,California,Knights Valley,Sonoma,Sauvignon Blanc,Macauley
3,3,US,"This spent 20 months in 30% new French oak, an...",Reserve,96,65.0,Oregon,Willamette Valley,Willamette Valley,Pinot Noir,Ponzi
4,4,France,"This is the top wine from La Bégude, named aft...",La Brûlade,95,66.0,Provence,Bandol,,Provence red blend,Domaine de la Bégude


# Here's the reason why we cannot download a CSV file from Google Drive:

### https://stackoverflow.com/questions/65888541/error-reading-cvs-with-pandas-from-google-drive-url

# Testing converting file from Excel to CSV

In [6]:
# Replace 'output.csv' with the desired output CSV file name
csv_file = 'dataset1.csv'

# Write the DataFrame to a CSV file
df.to_csv(csv_file, index=False)

# Remove Excel file
os.remove(excel_file)

print(f"Conversion from {excel_file} to {csv_file} completed successfully. \n Removing Excel file.")


Conversion from dataset1.xlsx to dataset1.csv completed successfully. 
 Removing Excel file.


In [7]:
#And now to read the CSV file for testing
df = pd.read_csv(csv_file)

df.head()


Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,variety,winery
0,0,US,This tremendous 100% varietal wine hails from ...,Martha's Vineyard,96,235.0,California,Napa Valley,Napa,Cabernet Sauvignon,Heitz
1,1,Spain,"Ripe aromas of fig, blackberry and cassis are ...",Carodorum Selección Especial Reserva,96,110.0,Northern Spain,Toro,,Tinta de Toro,Bodega Carmen Rodríguez
2,2,US,Mac Watson honors the memory of a wine once ma...,Special Selected Late Harvest,96,90.0,California,Knights Valley,Sonoma,Sauvignon Blanc,Macauley
3,3,US,"This spent 20 months in 30% new French oak, an...",Reserve,96,65.0,Oregon,Willamette Valley,Willamette Valley,Pinot Noir,Ponzi
4,4,France,"This is the top wine from La Bégude, named aft...",La Brûlade,95,66.0,Provence,Bandol,,Provence red blend,Domaine de la Bégude
