# CSV file format

## Reading data from CSV in Python

In [5]:
import seaborn as sns
import lxml
import openpyxl
import pandas as pd

In [7]:
filename = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0101EN-SkillsNetwork/labs/Module%205/data/addresses.csv"
import aiohttp
import asyncio

async def download(url, filename):
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            if response.status == 200:
                with open(filename, "wb") as f:
                    f.write(await response.read())

# Replace 'your_url_here' with the actual URL from which you want to download the file.
await download(filename, 'addresses.csv')

import pandas as pd
df = pd.read_csv('addresses.csv', header=None)

In [9]:
# Adding column name to the DataFrame
df.columns =['First Name', 'Last Name', 'Location ', 'City','State','Area Code']

In [10]:
df.head()

Unnamed: 0,First Name,Last Name,Location,City,State,Area Code
0,John,Doe,120 jefferson st.,Riverside,NJ,8075
1,Jack,McGinnis,220 hobo Av.,Phila,PA,9119
2,"John ""Da Man""",Repici,120 Jefferson St.,Riverside,NJ,8075
3,Stephen,Tyler,"7452 Terrace ""At the Plaza"" road",SomeTown,SD,91234
4,,Blankman,,SomeTown,SD,298


# JSON file Format
JSON is built on two structures:
* A collection of name/value pairs. 
* An ordered list of values. 

SON is a language-independent data format.

## Writing JSON to a File
Serializationc - the process of converting an object into a special format which is suitable for transmitting over the network or storing in file or database.

In [11]:
import json
person = {
    'first_name' : 'Mark',
    'last_name' : 'abc',
    'age' : 27,
    'address': {
        "streetAddress": "21 2nd Street",
        "city": "New York",
        "state": "NY",
        "postalCode": "10021-3100"
    }
}

## Serialization using dump() function
* json.dump() method can be used for writing to JSON file.                                 
Syntax: json.dump(dict, file_pointer)                 
Parameters:
* dictionary – name of the dictionary which should be converted to JSON object.
* file pointer – pointer of the file opened in write or append mode.

In [12]:
with open('person.json', 'w') as f:  # writing JSON object
    json.dump(person, f)

## Serialization using dumps() function
* json.dumps() that helps in converting a dictionary to a JSON object.                        
It takes two parameters:                            
* dictionary – name of the dictionary which should be converted to JSON object.
* indent – defines the number of units for indentation

In [13]:
# Serializing json  
json_object = json.dumps(person, indent = 4) 
  
# Writing to sample.json 
with open("sample.json", "w") as outfile: 
    outfile.write(json_object) 

In [14]:
print(json_object)

{
    "first_name": "Mark",
    "last_name": "abc",
    "age": 27,
    "address": {
        "streetAddress": "21 2nd Street",
        "city": "New York",
        "state": "NY",
        "postalCode": "10021-3100"
    }
}


## Reading JSON to a File
Using json.load()

In [15]:
import json 
  
# Opening JSON file 
with open('sample.json', 'r') as openfile: 
  
    # Reading from json file 
    json_object = json.load(openfile) 
  
print(json_object) 
print(type(json_object)) 

{'first_name': 'Mark', 'last_name': 'abc', 'age': 27, 'address': {'streetAddress': '21 2nd Street', 'city': 'New York', 'state': 'NY', 'postalCode': '10021-3100'}}
<class 'dict'>


# XLSX file format
* XLSX is a Microsoft Excel Open XML file format
* It is another type of Spreadsheet file format.

## Reading the data from XLSX file

In [19]:
filename = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0101EN-SkillsNetwork/labs/Module%205/data/file_example_XLSX_10.xlsx"
async def download(url, filename):
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            if response.status == 200:
                with open(filename, "wb") as f:
                    f.write(await response.read())

# Replace 'your_url_here' with the actual URL from which you want to download the file.
await download(filename, 'file_sample.xlsx')

import pandas as pd
df = pd.read_excel('file_sample.xlsx', header=None)

# XML Format
Extensible Markup Language

## Writing with xml.etree.ElementTree
* ElementTree represents the XML document as a tree
* Move across the document using nodes which are elements and sub-elements of the XML file.

In [17]:
import xml.etree.ElementTree as ET

# create the file structure
employee = ET.Element('employee')
details = ET.SubElement(employee, 'details')
first = ET.SubElement(details, 'firstname')
second = ET.SubElement(details, 'lastname')
third = ET.SubElement(details, 'age')
first.text = 'Shiv'
second.text = 'Mishra'
third.text = '23'

# create a new XML file with the results
mydata1 = ET.ElementTree(employee)
# myfile = open("items2.xml", "wb")
# myfile.write(mydata)
with open("new_sample.xml", "wb") as files:
    mydata1.write(files)

## Reading with xml.etree.ElementTree

In [20]:
import xml.etree.ElementTree as etree

filename = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0101EN-SkillsNetwork/labs/Module%205/data/Sample-employee-XML-file.xml"

async def download(url, filename):
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            if response.status == 200:
                with open(filename, "wb") as f:
                    f.write(await response.read())

# Replace 'your_url_here' with the actual URL from which you want to download the file.
await download(filename, 'sample_employee.xml')


In [22]:
tree = etree.parse("Sample_employee.xml")

root = tree.getroot()
columns = ["firstname", "lastname", "title", "division", "building","room"]

datatframe = pd.DataFrame(columns = columns)

for node in root: 

    firstname = node.find("firstname").text

    lastname = node.find("lastname").text 

    title = node.find("title").text 
    
    division = node.find("division").text 
    
    building = node.find("building").text
    
    room = node.find("room").text
    
    datatframe = pd.concat([datatframe, pd.Series([firstname, lastname, title, division, building, room], index = columns)], ignore_index = True)

In [24]:
datatframe.head()

Unnamed: 0,firstname,lastname,title,division,building,room,0
0,,,,,,,Shiv
1,,,,,,,Mishra
2,,,,,,,Engineer
3,,,,,,,Computer
4,,,,,,,301


# Binary File Format

In [25]:
# importing PIL 
from PIL import Image 

filename = "https://hips.hearstapps.com/hmg-prod.s3.amazonaws.com/images/dog-puppy-on-garden-royalty-free-image-1586966191.jpg"

async def download(url, filename):
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            if response.status == 200:
                with open(filename, "wb") as f:
                    f.write(await response.read())

# Replace 'your_url_here' with the actual URL from which you want to download the file.
await download(filename, "./dog.jpg")

In [26]:
# Read image 
img = Image.open('./dog.jpg','r') 
  
# Output Images 
img.show()