# Download differents art style buddha's pictures

## Different artistic current

- Dvaravati ทวารวดี
- Srivijaya ศรีวิชัย
- LopBuri ลพบุรี
- Sukhotai สุโขทัย
- Lan Na Chiang Saen ล้านนา เชียงแสน
- UThong พระอู่ทอง
- Ayuthaya อยุธยา,
- Ratanakosin ศิลปะรัตนโกสินทร์

In [1]:
style = ["Ayutthay Period","Dvaravati Period","Lopburi Period","Rattanakosin Period","Sukhothai Period","Uthong Art","Chiang Saen Kindom","Lanna Kingdom","Srivichai Kingdom"]

## Source

In [2]:
website = 'http://research.crma.ac.th/museum/'

## Scrapper definition

In [3]:
# Dependencies
import urllib
from bs4 import BeautifulSoup
import time
import numpy as np
import pandas as pd

In [7]:
class Statue:    
    """Statue class"""
    dwdStatue = 1# number of object downloaded  
    def __init__(self, caption, imgpath, style):
        self.caption = caption
        self.Img = imgpath
        self.style = style
    def download(self,destPath, rootPath):
        """Download the picture"""
        f = None
        try:
            f = urllib.request.urlretrieve(rootPath + self.Img, destPath + self.style + '_{}.jpg'.format(Statue.dwdStatue) )
        except:
            print('error in with {}'.format(self.Img))
        Statue.dwdStatue += 1
        return(f)
    def __repr__(self):
        return(str({'caption':self.caption, 'image': self.Img, 'style':self.style }))

In [8]:
class Museum:
    '''Museum class: container of Statue objects'''
    def __init__(self, url = None, style = None ):
        if url == None:
            self.content = list()
            self.style = None
        else:
            content = self.importFromUrl(url,style)
            self.content = content
            
    def importFromUrl(self, url,style):
        """This method load a web page and return a list of Statue object"""
        pageHTML = BeautifulSoup( urllib.request.urlopen(url).read() ,'lxml')
        pageHTML = pageHTML.find_all('figure',{'class':"text-center"})
        content = []
        for obj in pageHTML:
            caption =  obj.find("figcaption").text.upper()
            imgPath = obj.find("img").get('src')
            content.append(Statue(caption, imgPath, style ))
        return(content)
        
    def addNewSource(self,url,style):
        """Load the object of a web page"""
        content = self.importFromUrl(url,style)
        self.content.extend(content)
        
    def download(self,destPath, rootPath):
        """Download all the pictures of the content list"""       
        if len(self.content) != 0:
            for obj in self.content:
                obj.download(destPath, rootPath)
                time.sleep(np.random.randint(1,3) )
    def saveMuseum(self,pathFileName):
        """Save the museum as csv"""
        file = open(pathFileName,'w')
        file.write('caption;Image;style')
        for ob in self.content:
            file.write(ob.caption.replace('%20'," ") + ';' + ob.Img + ';' + ob.style + ';' '\n')
        file.close()
    def loadFromCSV(self,file):
        '''Load museum object from csv file'''
        f = open(file,'r')
        toLoad = f.readlines()
        f.close()
        for ob in toLoad:
            data = ob.split(';')
            self.content.append(Statue(data[0],data[1],data[2]) )
    def toDataFrame(self):
        """Transform the content in a dataFrame object"""
        out = dict()
        out['Caption'] = []
        out['Img'] = []
        out['Style'] = []
        for ob in self.content:
            out['Caption'].append(ob.caption)
            out['Img'].append(ob.Img)
            out['Style'].append(ob.style)
        ret = pd.DataFrame.from_dict(out)
        return(ret)

## Data downloading

In [9]:
data = Museum()

In [10]:
#Loop through the different styles to collect the museum informations
for s in style:
    s2 = s.replace(' ', '%20')
    data.addNewSource("http://research.crma.ac.th/museum/index.php?lang=en&promiddle=searchPeriod.php&cat={}".format(s2),s2)
    time.sleep(np.random.randint(10,20) )
data.saveMuseum("D:/Project/DeepLearning/buddhaStyle/Data/Museum.csv")#Save the museum

In [11]:
data.loadFromCSV("D:/Project/DeepLearning/buddhaStyle/Data/Museum.csv")
data.toDataFrame()

Unnamed: 0,Caption,Img,Style
0,STANDING BUDDHA ON VANASPATI,img/tphranakorn/bkk001.jpg,Dvaravati%20Period
1,BUDDHA SHELTERED BY NAGA'S HOOD,img/tphranakorn/bkk002.jpg,Dvaravati%20Period
2,TEMPLE BOUNDARY STONE (SIMA) WITH BUDDHA DESCE...,img/tphranakorn/bkk003.jpg,Dvaravati%20Period
3,HEAD OF BUDHISATTVA,img/tphranakorn/bkk004.jpg,Dvaravati%20Period
4,RELIEF DEPICTING NOBLE WOMAN AND LADY-IN-WAITING,img/tphranakorn/bkk005.jpg,Dvaravati%20Period
5,RELIEF DEPICTING FEMALE MUSICIANS,img/tphranakorn/bkk006.jpg,Dvaravati%20Period
6,BODHISATTVA AVALOKITESHVARA,img/tphranakorn/bkk007.jpg,Dvaravati%20Period
7,CHADDANTA JATAKA (A PREVIOUS LIFE OF THE BUDDHA),img/tphranakorn/bkk008.jpg,Dvaravati%20Period
8,WHEEL OF THE LAW AND A CROUCHING DEER,img/tphranakorn/bkk074.jpg,Dvaravati%20Period
9,STANDING BUDDHA IN THE GESTURE OF BLESSING,img/tphranakorn/bkk078.jpg,Dvaravati%20Period


In [12]:
#Download the museum
data.download("D:/Project/DeepLearning/buddhaStyle/Data/original/",website)