In [1]:
from datetime import datetime
import requests
from flask import Flask, jsonify, Response, request
import csv
import io
import unicodedata
from collections import OrderedDict

In [2]:
current_year = datetime.now().year

BASE_URL = "http://vitibrasil.cnpuv.embrapa.br/download/"

In [3]:

def get_csv_content(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.content, None
    except requests.RequestException as e:
        return None, f"Error fetching CSV: {str(e)}"

def remove_special_chars(text):
    normalized = unicodedata.normalize('NFKD', text)
    return ''.join(c for c in normalized if not unicodedata.combining(c))

def replace_special_chars(text):
    replacements = {
        'ç': 'c', 'á': 'a', 'à': 'a', 'ã': 'a', 'â': 'a',
        'é': 'e', 'ê': 'e', 'í': 'i', 'ó': 'o', 'ô': 'o',
        'õ': 'o', 'ú': 'u', 'ü': 'u'
    }
    return ''.join(replacements.get(c.lower(), c) for c in text)

In [21]:
def get_comercio():
    url = f"{BASE_URL}Comercio.csv"
    csv_content = get_csv_content(url)
    
    # csv_file = io.StringIO(csv_content.decode('utf-8'))
    csv_reader = csv.reader(csv_content, delimiter=';')
    headers = next(csv_reader)
    data = []
    for row in csv_reader:
        formatted_row = OrderedDict()
        formatted_row['id'] = row[0]
        formatted_row['control'] = replace_special_chars(remove_special_chars(row[1]))
        formatted_row['product'] = replace_special_chars(remove_special_chars(row[2]))
        formatted_row["measure_type"] = 'liter'

    return jsonify(data)

def get_producao():
    url = f"{BASE_URL}Producao.csv"
    csv_content = get_csv_content(url)
    
    csv_file = io.StringIO(csv_content.decode('utf-8'))
    csv_reader = csv.reader(csv_file, delimiter=';')
    headers = next(csv_reader)
    data = []
    for row in csv_reader:
        formatted_row = OrderedDict()
        formatted_row['id'] = row[0]
        formatted_row['control'] = replace_special_chars(remove_special_chars(row[1]))
        formatted_row['product'] = replace_special_chars(remove_special_chars(row[2]))
        formatted_row["measure_type"] = 'liter'

    return jsonify(data)

def get_processamento(csv_type):
    url = f"{BASE_URL}{csv_type}.csv"
    csv_content = get_csv_content(url)

    delimiter = '\t' if csv_type in ['ProcessaAmericanas', 'ProcessaMesa', 'ProcessaSemclass'] else ';'

    csv_file = io.StringIO(csv_content.decode('utf-8'))
    csv_reader = csv.reader(csv_file, delimiter=delimiter)
    headers = next(csv_reader)
    data = []
    for row in csv_reader:
        formatted_row = OrderedDict()
        formatted_row['id'] = row[0]
        formatted_row['control'] = replace_special_chars(remove_special_chars(row[1]))
        formatted_row['cultivar'] = replace_special_chars(remove_special_chars(row[2]))

        if csv_type.lower().startswith('processa'):
            formatted_row["measure_type"] = 'kg'
        else:
            formatted_row["measure_type"] = 'liter'
        
    return jsonify(data)

In [23]:
url = f"{BASE_URL}Comercio.csv"
csv_content = get_csv_content(url)


In [30]:
csv_content

'id;control;Produto;1970;1971;1972;1973;1974;1975;1976;1977;1978;1979;1980;1981;1982;1983;1984;1985;1986;1987;1988;1989;1990;1991;1992;1993;1994;1995;1996;1997;1998;1999;2000;2001;2002;2003;2004;2005;2006;2007;2008;2009;2010;2011;2012;2013;2014;2015;2016;2017;2018;2019;2020;2021;2022;2023\n1;VINHO DE MESA;VINHO DE MESA;98327606;114399031;118377367;116617910;94173324;108031792;139238614;140813114;141293379;149609112;122825298;128894580;166861772;195616620;171619507;185191837;203130018;131065191;150678647;172921267;164725646;190134895;180230431;201168480;180295366;146583828;165831436;174768638;181576649;200578746;221023603;221518224;227447392;217082959;225021830;271248493;245625614;226710045;200488612;234525979;221242945;230310468;206969571;221590810;206404427;209198468;166769622;176059959;177186273;180446489;215557931;210012238;187939996;187016848\n2;vm_Tinto;  Tinto;83300735;98522869;101167932;98196747;77167303;91528090;116407222;116609545;117203914;119496652;99646124;99151812;13195789

In [29]:
csv_file = io.StringIO(csv_content.decode('utf-8'))

AttributeError: 'str' object has no attribute 'decode'

In [26]:
csv_reader = csv.reader(csv_content, delimiter=';')

In [27]:
csv_reader

<_csv.reader at 0x28e0b260b20>

In [32]:
import requests
import csv
import json
from collections import OrderedDict
from flask import jsonify  # if using Flask for returning JSON

# Assume replace_special_chars and remove_special_chars are defined

def get_csv_content(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.content.decode('utf-8'), None  # Decode to string
    except requests.RequestException as e:
        return None, f"Error fetching CSV: {str(e)}"

def get_comercio():
    url = f"{BASE_URL}Comercio.csv"
    csv_content, error = get_csv_content(url)
    if error:
        return jsonify({"error": error})
    
    # Create CSV reader
    csv_file = io.StringIO(csv_content)
    csv_reader = csv.reader(csv_file, delimiter=';')
    
    # Extract headers
    headers = next(csv_reader)
    
    # Parse and clean data
    data = []
    for row in csv_reader:
        formatted_row = OrderedDict()
        formatted_row['id'] = row[0]
        formatted_row['control'] = replace_special_chars(remove_special_chars(row[1]))
        formatted_row['product'] = replace_special_chars(remove_special_chars(row[2]))
        formatted_row['measure_type'] = 'liter'  # As specified
        
        # Collect remaining columns as "yearly_data"
        yearly_data = OrderedDict()
        for year, value in zip(headers[3:], row[3:]):  # From the year columns onwards
            yearly_data[year] = value
        
        formatted_row['yearly_data'] = yearly_data
        data.append(formatted_row)
    
    return jsonify(data)  # If using Flask, otherwise return json.dumps(data)

# Example test
url = f"{BASE_URL}Comercio.csv"
csv_content = get_csv_content(url)


In [33]:
csv_content

('id;control;Produto;1970;1971;1972;1973;1974;1975;1976;1977;1978;1979;1980;1981;1982;1983;1984;1985;1986;1987;1988;1989;1990;1991;1992;1993;1994;1995;1996;1997;1998;1999;2000;2001;2002;2003;2004;2005;2006;2007;2008;2009;2010;2011;2012;2013;2014;2015;2016;2017;2018;2019;2020;2021;2022;2023\n1;VINHO DE MESA;VINHO DE MESA;98327606;114399031;118377367;116617910;94173324;108031792;139238614;140813114;141293379;149609112;122825298;128894580;166861772;195616620;171619507;185191837;203130018;131065191;150678647;172921267;164725646;190134895;180230431;201168480;180295366;146583828;165831436;174768638;181576649;200578746;221023603;221518224;227447392;217082959;225021830;271248493;245625614;226710045;200488612;234525979;221242945;230310468;206969571;221590810;206404427;209198468;166769622;176059959;177186273;180446489;215557931;210012238;187939996;187016848\n2;vm_Tinto;  Tinto;83300735;98522869;101167932;98196747;77167303;91528090;116407222;116609545;117203914;119496652;99646124;99151812;1319578

In [37]:
def process_csv_content(csv_content):
    # Ensure that csv_content is a string (unpack the content from tuple)
    if isinstance(csv_content, tuple):
        csv_content, error = csv_content
        if error:
            return {"error": error}  # Handle any error
    # Convert the CSV content string into StringIO for csv.reader
    csv_file = io.StringIO(csv_content)
    csv_reader = csv.reader(csv_file, delimiter=';')
    
    # Extract headers
    headers = next(csv_reader)
    
    # Parse and clean data
    data = []
    for row in csv_reader:
        formatted_row = OrderedDict()
        formatted_row['id'] = row[0]
        formatted_row['control'] = replace_special_chars(remove_special_chars(row[1]))
        formatted_row['product'] = replace_special_chars(remove_special_chars(row[2]))
        formatted_row['measure_type'] = 'liter'  # Hardcoded value as per initial requirements
        
        # Collect remaining columns as "yearly_data"
        yearly_data = OrderedDict()
        for year, value in zip(headers[3:], row[3:]):  # From the year columns onwards
            yearly_data[year] = value
        
        formatted_row['yearly_data'] = yearly_data
        data.append(formatted_row)
    
    # Return the data as a JSON string (since Flask's jsonify is unavailable here)
    return json.dumps(data, indent=4)

# Mock test (using the mocked csv_content string directly)
csv_content_tuple = csv_content  # Pass the string only, no tuple
json_result = process_csv_content(csv_content_tuple)
json_result


'[\n    {\n        "id": "1",\n        "control": "VINHO DE MESA",\n        "product": "VINHO DE MESA",\n        "measure_type": "liter",\n        "yearly_data": {\n            "1970": "98327606",\n            "1971": "114399031",\n            "1972": "118377367",\n            "1973": "116617910",\n            "1974": "94173324",\n            "1975": "108031792",\n            "1976": "139238614",\n            "1977": "140813114",\n            "1978": "141293379",\n            "1979": "149609112",\n            "1980": "122825298",\n            "1981": "128894580",\n            "1982": "166861772",\n            "1983": "195616620",\n            "1984": "171619507",\n            "1985": "185191837",\n            "1986": "203130018",\n            "1987": "131065191",\n            "1988": "150678647",\n            "1989": "172921267",\n            "1990": "164725646",\n            "1991": "190134895",\n            "1992": "180230431",\n            "1993": "201168480",\n            "1994": "