# Test Technique : Bevouac

## Module 1 : Scrapping

- Téléchargement du fichier contenant le fichier full.csv

In [52]:
#imports 
import json
import tempfile
import numpy as np
import pandas as pd

# IMPORTING MODULES
import os
import zipfile
import tarfile
import gzip
import shutil
import requests

#extraction du fichier suivant l'url et téléchargement dans le fichier souhaité 
# ARCHIVE EXTENSIONS
ZIP_EXTENSION = ".zip"
TAR_EXTENSION = ".tar"
TAR_GZ_EXTENSION = ".tar.gz"
TGZ_EXTENSION = ".tgz"
GZ_EXTENSION = ".gz"
EMPTY_URL_ERROR = "ERROR: URL should not be empty."
FILENAME_ERROR = "ERROR: Filename should not be empty."
UNKNOWN_FORMAT = "ERROR: Unknown file format. Can't extract."
with tempfile.TemporaryDirectory() as tmpdirname:
        print('created temporary directory', tmpdirname)

#enregistrer le fichier souhaité dans le fichier concerné dans notre cas enregistré dans le fichier de téléchargement
def download_dataset(url, target_path="data/", keep_download=True, overwrite_download=False):
    """Downloads dataset from a url.
    url: string, a dataset path
    target_path: string, path where data will be downloaded
    keep_download: boolean, keeps the original file after extraction
    overwrite_download: boolean, stops download if dataset already exists
    """
    if url == "" or url is None:
        raise Exception(EMPTY_URL_ERROR)

    filename = get_filename(url)
    file_location = get_file_location(target_path, filename)

    os.makedirs(tmpdirname, exist_ok=True) #your downloading target path 

    if os.path.exists(file_location) and not overwrite_download:
        print(f"File already exists at {file_location}. Use: 'overwrite_download=True' to \
overwrite download")
        extract_file(target_path, filename)
        return

    print(f"Downloading file from {url} to {file_location}.")
    # Download
    with open(file_location, 'wb') as f:
        with requests.get(url, allow_redirects=True, stream=True) as resp:
            for chunk in resp.iter_content(chunk_size = 512):  #chunk_size in bytes
                if chunk:
                    f.write(chunk)

    print("Finished downloading.")
    print("Extracting the file now ...")
    extract_file(os.path.join(tmpdirname, '') , filename)

    if not keep_download:
        os.remove(file_location)

def extract_file(target_path, filename):
    """Extract file based on file extension
    target_path: string, location where data will be extracted
    filename: string, name of the file along with extension
    """
    if filename == "" or filename is None:
        raise Exception(FILENAME_ERROR)

    file_location = get_file_location(target_path, filename)

    if filename.endswith(ZIP_EXTENSION):
        print("Extracting zip file...")
        zipf = zipfile.ZipFile(file_location, 'r')
        zipf.extractall(target_path)
        zipf.close()
        print(' E N D')
    elif filename.endswith(TAR_EXTENSION) or \
         filename.endswith(TAR_GZ_EXTENSION) or \
         filename.endswith(TGZ_EXTENSION):
        print("Extracting tar file")
        tarf = tarfile.open(file_location, 'r')
        tarf.extractall(target_path)
        tarf.close()
        print(' E N D')
    elif filename.endswith(GZ_EXTENSION):
        print("Extracting gz file")
        out_file = file_location[:-3]
        with open(file_location, "rb") as f_in:
            with open(out_file, "wb") as f_out:
                shutil.copyfileobj(f_in, f_out)
                print(' E N D')
    else:
        print(UNKNOWN_FORMAT)

def get_filename(url):
    """Extract filename from file url"""
    filename = os.path.basename(url)
    return filename

def get_file_location(target_path, filename):
    """ Concatenate download directory and filename"""
    return target_path + filename


created temporary directory C:\Users\Lenovo\AppData\Local\Temp\tmp57hd229t


In [53]:
#download information and locations of the file (temporary directory)

print('name of file: ',get_filename("https://cadastre.data.gouv.fr/data/etalab-dvf/latest/csv/2020/full.csv.gz"))
download_dataset("https://cadastre.data.gouv.fr/data/etalab-dvf/latest/csv/2020/full.csv.gz",os.path.join(tmpdirname, ''),keep_download=True, overwrite_download=False)

pass #pass the  ignoring error message: the python code does not recognize the path, because the file is temporary
#this normally is not recommended, but we use it to overcome the error messages and download the dataset
#properly

name of file:  full.csv.gz
Downloading file from https://cadastre.data.gouv.fr/data/etalab-dvf/latest/csv/2020/full.csv.gz to C:\Users\Lenovo\AppData\Local\Temp\tmp57hd229t\full.csv.gz.
Finished downloading.
Extracting the file now ...
Extracting gz file
 E N D


- Traitement Data

In [54]:
file_location = get_file_location(os.path.join(tmpdirname, ''), 'full.csv.gz')#obtaining file location

In [55]:
#pandas support zip file reads

#reading file
data_splited = pd.read_csv(file_location)


  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [56]:
data_splited #voici le fichier avec ses caractéristiques
print(data_splited.shape, 'shape of data')

(827105, 40) shape of data


In [57]:
#del(data_splited['my_name'])#adding column
data_splited

Unnamed: 0,id_mutation,date_mutation,numero_disposition,nature_mutation,valeur_fonciere,adresse_numero,adresse_suffixe,adresse_nom_voie,adresse_code_voie,code_postal,...,type_local,surface_reelle_bati,nombre_pieces_principales,code_nature_culture,nature_culture,code_nature_culture_speciale,nature_culture_speciale,surface_terrain,longitude,latitude
0,2020-1,2020-01-07,1,Vente,8000.0,,,FORTUNAT,B063,1250.0,...,,,,T,terres,,,1061.0,5.323522,46.171899
1,2020-2,2020-01-07,1,Vente,75000.0,,,RUE DE LA CHARTREUSE,0064,1960.0,...,,,,AB,terrains a bâtir,,,610.0,5.226197,46.184538
2,2020-3,2020-01-14,1,Vente,89000.0,,,VACAGNOLE,B112,1340.0,...,,,,AB,terrains a bâtir,,,600.0,,
3,2020-3,2020-01-14,1,Vente,89000.0,,,VACAGNOLE,B112,1340.0,...,,,,AB,terrains a bâtir,,,491.0,,
4,2020-4,2020-01-22,1,Vente,209950.0,11.0,,RUE REYER,3340,1000.0,...,Maison,78.0,5.0,S,sols,,,242.0,5.219902,46.196484
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
827100,2020-377318,2020-06-30,1,Vente,520000.0,55.0,,RUE MONTMARTRE,6513,75002.0,...,Appartement,47.0,3.0,,,,,,2.344134,48.865842
827101,2020-377319,2020-06-26,1,Vente,1740000.0,104.0,,RUE REAUMUR,8073,75002.0,...,Appartement,150.0,5.0,,,,,,2.347903,48.867536
827102,2020-377320,2020-06-25,1,Vente,725000.0,16.0,,RUE CHARLEMAGNE,1797,75004.0,...,Appartement,48.0,3.0,,,,,,2.359936,48.854633
827103,2020-377321,2020-06-23,1,Vente,200000.0,7.0,,RUE CHABANAIS,1669,75002.0,...,Appartement,26.0,1.0,,,,,,2.336652,48.867463


In [58]:
data_splited['NOM_CANDIDAT'] = 'MAAREF'#adding column with my name 

In [59]:
data_splited = data_splited[['NOM_CANDIDAT','id_mutation', 'date_mutation', 'numero_disposition', 'nature_mutation',
       'valeur_fonciere', 'adresse_numero', 'adresse_suffixe',
       'adresse_nom_voie', 'adresse_code_voie', 'code_postal', 'code_commune',
       'nom_commune', 'code_departement', 'ancien_code_commune',
       'ancien_nom_commune', 'id_parcelle', 'ancien_id_parcelle',
       'numero_volume', 'lot1_numero', 'lot1_surface_carrez', 'lot2_numero',
       'lot2_surface_carrez', 'lot3_numero', 'lot3_surface_carrez',
       'lot4_numero', 'lot4_surface_carrez', 'lot5_numero',
       'lot5_surface_carrez', 'nombre_lots', 'code_type_local', 'type_local',
       'surface_reelle_bati', 'nombre_pieces_principales',
       'code_nature_culture', 'nature_culture', 'code_nature_culture_speciale',
       'nature_culture_speciale', 'surface_terrain', 'longitude', 'latitude',
       ]]#new column NOM_CANDIDAT in the first position

In [60]:
data_splited#new data frame with column NOM_CANDIDAT added

Unnamed: 0,NOM_CANDIDAT,id_mutation,date_mutation,numero_disposition,nature_mutation,valeur_fonciere,adresse_numero,adresse_suffixe,adresse_nom_voie,adresse_code_voie,...,type_local,surface_reelle_bati,nombre_pieces_principales,code_nature_culture,nature_culture,code_nature_culture_speciale,nature_culture_speciale,surface_terrain,longitude,latitude
0,MAAREF,2020-1,2020-01-07,1,Vente,8000.0,,,FORTUNAT,B063,...,,,,T,terres,,,1061.0,5.323522,46.171899
1,MAAREF,2020-2,2020-01-07,1,Vente,75000.0,,,RUE DE LA CHARTREUSE,0064,...,,,,AB,terrains a bâtir,,,610.0,5.226197,46.184538
2,MAAREF,2020-3,2020-01-14,1,Vente,89000.0,,,VACAGNOLE,B112,...,,,,AB,terrains a bâtir,,,600.0,,
3,MAAREF,2020-3,2020-01-14,1,Vente,89000.0,,,VACAGNOLE,B112,...,,,,AB,terrains a bâtir,,,491.0,,
4,MAAREF,2020-4,2020-01-22,1,Vente,209950.0,11.0,,RUE REYER,3340,...,Maison,78.0,5.0,S,sols,,,242.0,5.219902,46.196484
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
827100,MAAREF,2020-377318,2020-06-30,1,Vente,520000.0,55.0,,RUE MONTMARTRE,6513,...,Appartement,47.0,3.0,,,,,,2.344134,48.865842
827101,MAAREF,2020-377319,2020-06-26,1,Vente,1740000.0,104.0,,RUE REAUMUR,8073,...,Appartement,150.0,5.0,,,,,,2.347903,48.867536
827102,MAAREF,2020-377320,2020-06-25,1,Vente,725000.0,16.0,,RUE CHARLEMAGNE,1797,...,Appartement,48.0,3.0,,,,,,2.359936,48.854633
827103,MAAREF,2020-377321,2020-06-23,1,Vente,200000.0,7.0,,RUE CHABANAIS,1669,...,Appartement,26.0,1.0,,,,,,2.336652,48.867463


In [61]:
adresse = pd.concat([data_splited["adresse_numero"], data_splited["adresse_nom_voie"], data_splited["nom_commune"],data_splited["code_postal"]], axis=1) 

In [62]:
adresse["pays"] = 'FRANCE' #adding the column 'pays'

In [63]:
adresse['code_postal'] = adresse['code_postal'].values.astype(int) #converting postal code to int

adresse#creating a dataframe adresse that we will use to merge the columns 

Unnamed: 0,adresse_numero,adresse_nom_voie,nom_commune,code_postal,pays
0,,FORTUNAT,Ceyzériat,1250,FRANCE
1,,RUE DE LA CHARTREUSE,Péronnas,1960,FRANCE
2,,VACAGNOLE,Attignat,1340,FRANCE
3,,VACAGNOLE,Attignat,1340,FRANCE
4,11.0,RUE REYER,Bourg-en-Bresse,1000,FRANCE
...,...,...,...,...,...
827100,55.0,RUE MONTMARTRE,Paris 2e Arrondissement,75002,FRANCE
827101,104.0,RUE REAUMUR,Paris 2e Arrondissement,75002,FRANCE
827102,16.0,RUE CHARLEMAGNE,Paris 4e Arrondissement,75004,FRANCE
827103,7.0,RUE CHABANAIS,Paris 2e Arrondissement,75002,FRANCE


In [64]:
data_splited['adresse_string'] = adresse[adresse.columns[0:]].apply(
    lambda x: ' '.join(x.dropna().astype(str)),
    axis=1) #merging different columns from the data frame adresse and creating the column 'adresse_string'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_splited['adresse_string'] = adresse[adresse.columns[0:]].apply(


In [65]:
data_splited['adresse_string'] #data tranformed with the right form
#NUMERO_RUE NOM_RUE NOM_VILLE CODE_POSTAL PAYS


0                            FORTUNAT Ceyzériat 1250 FRANCE
1                 RUE DE LA CHARTREUSE Péronnas 1960 FRANCE
2                            VACAGNOLE Attignat 1340 FRANCE
3                            VACAGNOLE Attignat 1340 FRANCE
4                11.0 RUE REYER Bourg-en-Bresse 1000 FRANCE
                                ...                        
827100    55.0 RUE MONTMARTRE Paris 2e Arrondissement 75...
827101    104.0 RUE REAUMUR Paris 2e Arrondissement 7500...
827102    16.0 RUE CHARLEMAGNE Paris 4e Arrondissement 7...
827103    7.0 RUE CHABANAIS Paris 2e Arrondissement 7500...
827104    3.0 RUE SAINT-JOSEPH Paris 2e Arrondissement 7...
Name: adresse_string, Length: 827105, dtype: object

In [135]:
#delete Nan lines from the dataframe in the columns 'longitudes ' and 'latitudes'

data_splited.dropna(subset=["longitude"], axis=0, inplace=True)
data_splited.dropna(subset=["latitude"], axis=0, inplace=True)

data_splited['valeur_fonciere'] = data_splited['valeur_fonciere'].values.astype(int)
data_splited['longitude'] = data_splited['longitude'].values.astype(float)
data_splited['latitude'] = data_splited['latitude'].values.astype(float)

In [136]:
data_splited #data with all existing longitude and latitude


Unnamed: 0,NOM_CANDIDAT,id_mutation,date_mutation,numero_disposition,nature_mutation,valeur_fonciere,adresse_numero,adresse_suffixe,adresse_nom_voie,adresse_code_voie,...,surface_reelle_bati,nombre_pieces_principales,code_nature_culture,nature_culture,code_nature_culture_speciale,nature_culture_speciale,surface_terrain,longitude,latitude,adresse_string
0,MAAREF,2020-117283,2020-01-01,1,Vente,2055,,,LA QUERLA,B147,...,,,L,landes,PATUR,Pâture plantée,10276.0,6.190460,45.098818,LA QUERLA Besse 38142 FRANCE
1,MAAREF,2020-160394,2020-01-01,1,Vente,100000,3.0,,RUE DU JEU DE BOULE,0080,...,75.0,3.0,S,sols,,,122.0,-0.558756,47.421083,3.0 RUE DU JEU DE BOULE Sainte-Gemmes-sur-Loir...
2,MAAREF,2020-29335,2020-01-01,1,Vente,195000,11.0,,RUE SAUVEUR TOBELEM,8654,...,,0.0,,,,,,5.365564,43.288497,11.0 RUE SAUVEUR TOBELEM Marseille 7e Arrondis...
3,MAAREF,2020-29335,2020-01-01,1,Vente,195000,11.0,,RUE SAUVEUR TOBELEM,8654,...,53.0,2.0,,,,,,5.365564,43.288497,11.0 RUE SAUVEUR TOBELEM Marseille 7e Arrondis...
4,MAAREF,2020-156084,2020-01-02,1,Vente,428000,28.0,,RUE PALISSY,2910,...,21.0,1.0,S,sols,,,103.0,0.614380,44.200272,28.0 RUE PALISSY Agen 47000 FRANCE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
780126,MAAREF,2020-361773,2020-06-30,1,Vente,33100000,66.0,,RUE DE PONTHIEU,7612,...,37.0,1.0,S,sols,,,767.0,2.305291,48.872334,66.0 RUE DE PONTHIEU Paris 8e Arrondissement 7...
780127,MAAREF,2020-8121,2020-06-30,1,Vente,35000,5136.0,,EYGUIANS GARE,B012,...,,0.0,T,terres,,,1175.0,5.769950,44.345989,5136.0 EYGUIANS GARE Garde-Colombe 5300 FRANCE
780128,MAAREF,2020-216822,2020-06-30,1,Vente,35000,62.0,,RUE GRASSIN BALEDANS,1900,...,18.0,1.0,,,,,,2.789443,50.287285,62.0 RUE GRASSIN BALEDANS Arras 62000 FRANCE
780129,MAAREF,2020-216823,2020-06-30,1,Vente,162000,,,LE VILLAGE EST,B030,...,,,J,jardins,,,550.0,3.006803,50.248522,LE VILLAGE EST Dury 62156 FRANCE


In [137]:
print(data_splited.shape, '--> new shape after dropping NaN values in columns longitude and latitude')

(780131, 42) --> new shape after dropping NaN values in columns longitude and latitude


In [138]:
data_splited.reset_index(drop=True, inplace=True)#reset index with new number of lines 
data_splited.to_csv(r'C:\Users\Lenovo\Desktop\Bevouac\data_splited.csv', index=False)

## Module 2 : Import de données

- Dans cette partie nous allons afficher les 500 dates les plus récentes 
- Nous allons par la suite charger le nouveau dataframe dans la base Airtable

In [139]:
#data_splited['date_mutation'] = pd.to_datetime(data_splited['date_mutation'])
data_splited = data_splited.sort_values('date_mutation')

data_splited.reset_index(drop=True, inplace=True)#reset index with new number of lines 
data_splited.head(500)#We use head to obtain the 500 recent dates 


Unnamed: 0,NOM_CANDIDAT,id_mutation,date_mutation,numero_disposition,nature_mutation,valeur_fonciere,adresse_numero,adresse_suffixe,adresse_nom_voie,adresse_code_voie,...,surface_reelle_bati,nombre_pieces_principales,code_nature_culture,nature_culture,code_nature_culture_speciale,nature_culture_speciale,surface_terrain,longitude,latitude,adresse_string
0,MAAREF,2020-117283,2020-01-01,1,Vente,2055,,,LA QUERLA,B147,...,,,L,landes,PATUR,Pâture plantée,10276.0,6.190460,45.098818,LA QUERLA Besse 38142 FRANCE
1,MAAREF,2020-160394,2020-01-01,1,Vente,100000,3.0,,RUE DU JEU DE BOULE,0080,...,75.0,3.0,S,sols,,,122.0,-0.558756,47.421083,3.0 RUE DU JEU DE BOULE Sainte-Gemmes-sur-Loir...
2,MAAREF,2020-29335,2020-01-01,1,Vente,195000,11.0,,RUE SAUVEUR TOBELEM,8654,...,,0.0,,,,,,5.365564,43.288497,11.0 RUE SAUVEUR TOBELEM Marseille 7e Arrondis...
3,MAAREF,2020-29335,2020-01-01,1,Vente,195000,11.0,,RUE SAUVEUR TOBELEM,8654,...,53.0,2.0,,,,,,5.365564,43.288497,11.0 RUE SAUVEUR TOBELEM Marseille 7e Arrondis...
4,MAAREF,2020-63182,2020-01-02,1,Vente,250000,17.0,,RUE DE KROAS PRENN,0450,...,120.0,3.0,S,sols,,,362.0,-3.984215,47.907844,17.0 RUE DE KROAS PRENN La Forêt-Fouesnant 299...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,MAAREF,2020-226573,2020-01-02,1,Vente,336770,25.0,,AV AUGUSTE WISSEL,0063,...,116.0,5.0,S,sols,,,479.0,4.847159,45.881893,25.0 AV AUGUSTE WISSEL Neuville-sur-Saône 6925...
496,MAAREF,2020-77582,2020-01-02,1,Vente,26980,,,FLORIMOND,B069,...,,,S,sols,,,10803.0,-0.563442,44.922721,FLORIMOND Blanquefort 33290 FRANCE
497,MAAREF,2020-226573,2020-01-02,1,Vente,336770,25.0,,AV AUGUSTE WISSEL,0063,...,116.0,5.0,L,landes,,,1725.0,4.847159,45.881893,25.0 AV AUGUSTE WISSEL Neuville-sur-Saône 6925...
498,MAAREF,2020-226570,2020-01-02,1,Vente,68900,15.0,,RUE ARTAUD,0490,...,20.0,1.0,,,,,,4.834711,45.781591,15.0 RUE ARTAUD Lyon 4e Arrondissement 69004 F...


In [191]:
"""API Module 2"""

import requests
import csv
import json



with tempfile.TemporaryDirectory() as tmpdirname2:
    data_splited.head(500).to_csv(os.path.join(tmpdirname2, '')+"Data_module2.csv")
        
    f = open(os.path.join(tmpdirname2, '')+"Data_module2.csv")
    print('make sure io.wrapper is convenient type', type(f))



    post_url = "https://api.airtable.com/v0/appqmv1skloVlLyKV/DVF"
    post_headers = {
        "Authorization" : "Bearer key9SsqMTpaKecsn0",
        "Content-Type": "application/json"
    }

        #f = open('C:\\Users\Lenovo\Desktop\Bevouac\Data_module2.csv')
    csv_f = csv.DictReader(f, delimiter=',')

    for row in csv_f:


        data = {

      "records": [
        {
          "fields": {
              "NOM_CANDIDAT": row['NOM_CANDIDAT'],
              "id_mutation": row['id_mutation'],
              "date_mutation": row['date_mutation'],
              "numero_disposition": int(row['numero_disposition']),
              "nature_mutation": row['nature_mutation'],
              "valeur_fonciere": int(row['valeur_fonciere']),
              "adresse_numero": row['adresse_numero'],
              "adresse_suffixe": row['adresse_suffixe'],
              "adresse_nom_voie": row['adresse_nom_voie'],
              "adresse_code_voie": row['adresse_code_voie'],
              "code_postal": row['code_postal'],
              "code_commune": row['code_commune'],
              "nom_commune": row['nom_commune'],
              "code_departement": row['code_departement'],
              "ancien_code_commune": row['ancien_code_commune'],
              "ancien_nom_commune": row['ancien_nom_commune'],
              "id_parcelle": row['id_parcelle'],
              "ancien_id_parcelle": row['ancien_id_parcelle'],
              "numero_volume": row['numero_volume'],
              "lot1_numero": row['lot1_numero'],
              "lot1_surface_carrez": row['lot1_surface_carrez'],
              "lot2_numero": row['lot2_numero'],
              "lot2_surface_carrez": row['lot2_surface_carrez'],
              "lot3_numero": row['lot3_numero'],
              "lot3_surface_carrez": row['lot3_surface_carrez'],
              "lot4_numero": row['lot4_numero'],
              "lot4_surface_carrez": row['lot4_surface_carrez'],
              "lot5_numero": row['lot5_numero'],
              "lot5_surface_carrez": row['lot5_surface_carrez'],
              "nombre_lots": row['nombre_lots'],
              "code_type_local": row['code_type_local'],
              "type_local": row['type_local'],
              "surface_reelle_bati": row['surface_reelle_bati'],
              "nombre_pieces_principales": row['nombre_pieces_principales'],
              "code_nature_culture": row['code_nature_culture'],
              "nature_culture": row['nature_culture'],
              "code_nature_culture_speciale": row['code_nature_culture_speciale'],
              "nature_culture_speciale": row['nature_culture_speciale'],
              "surface_terrain": row['surface_terrain'],
              "longitude": float(row['longitude']),
              "latitude": float(row['latitude']),
              "adresse_string": row['adresse_string']

                }

            },
      ],





    }



        print(post_url)
        print(data)

        post_airtable_request = requests.post(post_url, headers = post_headers, json = data)
        print(post_airtable_request.status_code)


make sure io.wrapper is convenient type <class '_io.TextIOWrapper'>
https://api.airtable.com/v0/appqmv1skloVlLyKV/DVF
{'records': [{'fields': {'NOM_CANDIDAT': 'MAAREF', 'id_mutation': '2020-117283', 'date_mutation': '2020-01-01', 'numero_disposition': 1, 'nature_mutation': 'Vente', 'valeur_fonciere': 2055, 'adresse_numero': '', 'adresse_suffixe': '', 'adresse_nom_voie': 'LA QUERLA', 'adresse_code_voie': 'B147', 'code_postal': '38142.0', 'code_commune': '38040', 'nom_commune': 'Besse', 'code_departement': '38', 'ancien_code_commune': '', 'ancien_nom_commune': '', 'id_parcelle': '380400000C1025', 'ancien_id_parcelle': '', 'numero_volume': '', 'lot1_numero': '', 'lot1_surface_carrez': '', 'lot2_numero': '', 'lot2_surface_carrez': '', 'lot3_numero': '', 'lot3_surface_carrez': '', 'lot4_numero': '', 'lot4_surface_carrez': '', 'lot5_numero': '', 'lot5_surface_carrez': '', 'nombre_lots': '0', 'code_type_local': '', 'type_local': '', 'surface_reelle_bati': '', 'nombre_pieces_principales': '', 

NotADirectoryError: [WinError 267] Nom de répertoire non valide: 'C:\\Users\\Lenovo\\AppData\\Local\\Temp\\tmp3hy1twrm\\Data_module2.csv'

int

## Module 3 : Flux de données 

- Dans cette partie, à chaque exécution le programme doit importer uniquement les données "inédites" (qui ne sont pas présentes dans la base Airtable )


In [189]:

        
#code of module 3
#print 500 new lines for example

with open("Data_module2.csv", "r") as readfile:
    lines = readfile.readlines()
    
    list_num = len((lines))

with tempfile.TemporaryDirectory() as tmpdirname3:
        new_data = data_splited.loc[list_num:list_num+500,:]#concaténation des lignes inédites + 500 nouvelles lignes
        new_data.to_csv(os.path.join(tmpdirname3, '')+"new_data.csv")
        f2 = open(os.path.join(tmpdirname3, '')+"new_data.csv")
        print('make sure io.wrapper is convenient type', type(f2))
        


        post_url = "https://api.airtable.com/v0/appqmv1skloVlLyKV/DVF"
        post_headers = {
            "Authorization" : "Bearer key9SsqMTpaKecsn0",
            "Content-Type": "application/json"
        }

        #f = open('C:\\Users\Lenovo\Desktop\Bevouac\Data_module2.csv')
        csv_f2 = csv.DictReader(f2, delimiter=',')

        for row in csv_f2:


            data = {

          "records": [
            {
              "fields": {
                  "NOM_CANDIDAT": row['NOM_CANDIDAT'],
                  "id_mutation": row['id_mutation'],
                  "date_mutation": row['date_mutation'],
                  "numero_disposition": int(row['numero_disposition']),
                  "nature_mutation": row['nature_mutation'],
                  "valeur_fonciere": int(row['valeur_fonciere']),
                  "adresse_numero": row['adresse_numero'],
                  "adresse_suffixe": row['adresse_suffixe'],
                  "adresse_nom_voie": row['adresse_nom_voie'],
                  "adresse_code_voie": row['adresse_code_voie'],
                  "code_postal": row['code_postal'],
                  "code_commune": row['code_commune'],
                  "nom_commune": row['nom_commune'],
                  "code_departement": row['code_departement'],
                  "ancien_code_commune": row['ancien_code_commune'],
                  "ancien_nom_commune": row['ancien_nom_commune'],
                  "id_parcelle": row['id_parcelle'],
                  "ancien_id_parcelle": row['ancien_id_parcelle'],
                  "numero_volume": row['numero_volume'],
                  "lot1_numero": row['lot1_numero'],
                  "lot1_surface_carrez": row['lot1_surface_carrez'],
                  "lot2_numero": row['lot2_numero'],
                  "lot2_surface_carrez": row['lot2_surface_carrez'],
                  "lot3_numero": row['lot3_numero'],
                  "lot3_surface_carrez": row['lot3_surface_carrez'],
                  "lot4_numero": row['lot4_numero'],
                  "lot4_surface_carrez": row['lot4_surface_carrez'],
                  "lot5_numero": row['lot5_numero'],
                  "lot5_surface_carrez": row['lot5_surface_carrez'],
                  "nombre_lots": row['nombre_lots'],
                  "code_type_local": row['code_type_local'],
                  "type_local": row['type_local'],
                  "surface_reelle_bati": row['surface_reelle_bati'],
                  "nombre_pieces_principales": row['nombre_pieces_principales'],
                  "code_nature_culture": row['code_nature_culture'],
                  "nature_culture": row['nature_culture'],
                  "code_nature_culture_speciale": row['code_nature_culture_speciale'],
                  "nature_culture_speciale": row['nature_culture_speciale'],
                  "surface_terrain": row['surface_terrain'],
                  "longitude": float(row['longitude']),
                  "latitude": float(row['latitude']),
                  "adresse_string": row['adresse_string']

                    }

                },
          ],





        }



            print(post_url)
            print(data)

            post_airtable_request = requests.post(post_url, headers = post_headers, json = data)
            print(post_airtable_request.status_code)

make sure io.wrapper is convenient type <class '_io.TextIOWrapper'>
https://api.airtable.com/v0/appqmv1skloVlLyKV/DVF
{'records': [{'fields': {'NOM_CANDIDAT': 'MAAREF', 'id_mutation': '2020-77613', 'date_mutation': '2020-01-02', 'numero_disposition': 1, 'nature_mutation': 'Vente', 'valeur_fonciere': 442100, 'adresse_numero': '', 'adresse_suffixe': '', 'adresse_nom_voie': 'RUE MARCEL BENSAC', 'adresse_code_voie': '0460', 'code_postal': '33290.0', 'code_commune': '33312', 'nom_commune': 'Parempuyre', 'code_departement': '33', 'ancien_code_commune': '', 'ancien_nom_commune': '', 'id_parcelle': '33312000BD0316', 'ancien_id_parcelle': '', 'numero_volume': '', 'lot1_numero': '', 'lot1_surface_carrez': '', 'lot2_numero': '', 'lot2_surface_carrez': '', 'lot3_numero': '', 'lot3_surface_carrez': '', 'lot4_numero': '', 'lot4_surface_carrez': '', 'lot5_numero': '', 'lot5_surface_carrez': '', 'nombre_lots': '0', 'code_type_local': '', 'type_local': '', 'surface_reelle_bati': '', 'nombre_pieces_prin

NotADirectoryError: [WinError 267] Nom de répertoire non valide: 'C:\\Users\\Lenovo\\AppData\\Local\\Temp\\tmp_c478vfe\\new_data.csv'