### Here we will read and write an xml/Json file

In [1]:
### libraries 
library('XML')
library('methods')
library('tidyverse')
library('httr')
library('jsonlite')

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.2 ──
[32m✔[39m [34mggplot2[39m 3.4.0      [32m✔[39m [34mpurrr  [39m 0.3.5 
[32m✔[39m [34mtibble [39m 3.1.8      [32m✔[39m [34mdplyr  [39m 1.0.10
[32m✔[39m [34mtidyr  [39m 1.2.1      [32m✔[39m [34mstringr[39m 1.4.1 
[32m✔[39m [34mreadr  [39m 2.1.3      [32m✔[39m [34mforcats[39m 0.5.2 
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

Attaching package: ‘jsonlite’


The following object is masked from ‘package:purrr’:

    flatten




#### XML

In [2]:
### read xml file from folder
data <- xmlToDataFrame('data/data_10/root_data/cd.xml')

In [3]:
data

TITLE,ARTIST,COUNTRY,COMPANY,PRICE,YEAR
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
Empire Burlesque,Bob Dylan,USA,Columbia,10.9,1985
Greatest Hits,Dolly Parton,USA,RCA,9.9,1982
When a man loves a woman,Percy Sledge,USA,Atlantic,8.7,1987
1999 Grammy Nominees,Many,USA,Grammy,10.2,1999
Big Willie style,Will Smith,USA,Columbia,9.9,1997
The dock of the bay,Otis Redding,USA,Stax Records,7.9,1968
Unchain my heart,Joe Cocker,USA,EMI,8.2,1987


In [4]:
### get data Country == USA
CDs_USA = data[data['COUNTRY']=='USA',]
CDs_USA

Unnamed: 0_level_0,TITLE,ARTIST,COUNTRY,COMPANY,PRICE,YEAR
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,Empire Burlesque,Bob Dylan,USA,Columbia,10.9,1985
2,Greatest Hits,Dolly Parton,USA,RCA,9.9,1982
3,When a man loves a woman,Percy Sledge,USA,Atlantic,8.7,1987
4,1999 Grammy Nominees,Many,USA,Grammy,10.2,1999
5,Big Willie style,Will Smith,USA,Columbia,9.9,1997
6,The dock of the bay,Otis Redding,USA,Stax Records,7.9,1968
7,Unchain my heart,Joe Cocker,USA,EMI,8.2,1987


In [5]:
### create XML from Dataframe
doc = newXMLDoc()
### create rootnode
top = newXMLNode('CATALOG', doc = doc)
### loop for each row in Dataframe
for (row in 1:nrow(CDs_USA)){
    cdnode = newXMLNode('CD', parent = top) ## create rootnode, its parent node is top
    newXMLNode('TITLE', CDs_USA[row, 'TITLE'], parent=cdnode) # 'TITLE' is node name, its parent is cnode
    newXMLNode('ARTIST', CDs_USA[row, 'ARTIST'], parent=cdnode)
    newXMLNode('COUNTRY', CDs_USA[row, 'COUNTRY'], parent=cdnode)
    newXMLNode('COMPANY', CDs_USA[row, 'COMPANY'], parent=cdnode)
    newXMLNode('PRICE', CDs_USA[row, 'PRICE'], parent=cdnode)
    newXMLNode('YEAR', CDs_USA[row, 'YEAR'], parent=cdnode)
}


In [6]:
### Write XML
cat(saveXML(doc, indent = TRUE,
           prefix = "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n"),
   file='data/data_10/root_data/cd_usa.xml')

In [7]:
### Read data again
data <- xmlToDataFrame('data/data_10/root_data/cd_usa.xml')
head(data)

Unnamed: 0_level_0,TITLE,ARTIST,COUNTRY,COMPANY,PRICE,YEAR
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,Empire Burlesque,Bob Dylan,USA,Columbia,10.9,1985
2,Greatest Hits,Dolly Parton,USA,RCA,9.9,1982
3,When a man loves a woman,Percy Sledge,USA,Atlantic,8.7,1987
4,1999 Grammy Nominees,Many,USA,Grammy,10.2,1999
5,Big Willie style,Will Smith,USA,Columbia,9.9,1997
6,The dock of the bay,Otis Redding,USA,Stax Records,7.9,1968


In [8]:
#### we read xml from url (should exclude s in https --> http)
url <- 'http://www.w3schools.com/xml/cd_catalog.xml'
download.file(url,destfile = 'c33.xml')

In [9]:
cd <- xmlToDataFrame('c33.xml')
head(cd)

Unnamed: 0_level_0,TITLE,ARTIST,COUNTRY,COMPANY,PRICE,YEAR
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,Empire Burlesque,Bob Dylan,USA,Columbia,10.9,1985
2,Hide your heart,Bonnie Tyler,UK,CBS Records,9.9,1988
3,Greatest Hits,Dolly Parton,USA,RCA,9.9,1982
4,Still got the blues,Gary Moore,UK,Virgin records,10.2,1990
5,Eros,Eros Ramazzotti,EU,BMG,9.9,1997
6,One night only,Bee Gees,UK,Polydor,10.9,1998


### JSON

In [10]:
json_text <- fromJSON('data/data_10/root_data/books.json')

In [11]:
json_text

Unnamed: 0_level_0,title,publisher,isbn,pages,author,attribute
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<list>,<list>
1,Applied Linear Statistical Models,McGraw Hill,9780073108742,1396,"Michael Kutner , William Li , Christopher Nachtsheim, John Neter","Exercises , Illustrations, Readability"
2,Mathematical Proofs: A Transition to Advanced Mathematics,Pearson,9780321390530,365,"Gary Chartrand , Ping Zhang , Albert Polimeni","Exercises , Readability"
3,Mathematical Statistics with Resampling and R,Wiley,9781118029855,418,"Laura Chihara , Tim Hesterberg","Exercises , Illustrations, Readability"


In [12]:
books <- as.data.frame(json_text$Mathematics$book)

In [13]:
books

Unnamed: 0_level_0,title,publisher,isbn,pages,author,attribute
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<list>,<list>
1,Applied Linear Statistical Models,McGraw Hill,9780073108742,1396,"Michael Kutner , William Li , Christopher Nachtsheim, John Neter","Exercises , Illustrations, Readability"
2,Mathematical Proofs: A Transition to Advanced Mathematics,Pearson,9780321390530,365,"Gary Chartrand , Ping Zhang , Albert Polimeni","Exercises , Readability"
3,Mathematical Statistics with Resampling and R,Wiley,9781118029855,418,"Laura Chihara , Tim Hesterberg","Exercises , Illustrations, Readability"


In [None]:
### Write to Json
json_books <- toJSON(books)
write(json_books, file = 'data/data_10')

### get JSON from internet

In [None]:
url <- 'http://phuong13021982.pythonanywhere.com/mystore/product_service/'
get_url <- GET(url)
content <- rawToChar(get_url$content) ### get content only
json <- fromJSON(content)
tivis <- data.frame(json)
print(tivis[c(1,2,3)])