In [6]:
import pandas as pd
import numpy as np
import requests
from datetime import datetime
import urllib.parse
import re

In [18]:
now = datetime.now()

# Checking pandas version
make sure it is 1.4.0 above

In [19]:
pd.__version__

'1.4.1'

## - Getting data from API
It might varies depend on your server, but basically is just to load the list of data into dataframe

In [20]:
r = requests.get("https://reqres.in/api/products")
j = r.json()["data"]

df = pd.DataFrame.from_dict(j)
df

Unnamed: 0,id,name,year,color,pantone_value
0,1,cerulean,2000,#98B2D1,15-4020
1,2,fuchsia rose,2001,#C74375,17-2031
2,3,true red,2002,#BF1932,19-1664
3,4,aqua sky,2003,#7BC4C4,14-4811
4,5,tigerlily,2004,#E2583E,17-1456
5,6,blue turquoise,2005,#53B0AE,15-5217


## - Generate dynamic list
Here i use the header https://aipharm.xyz/ as my header. I would like to do a pattern of https://aipharm.xyz/<products>/<id>/<product-name> 
    
Since the product is update monthly, I will put a priority of 0.6 and changefred of monthly

In [22]:
def returnURL(name,id,type):
    pattern = re.compile(r"[^\w\s]")
    url_name = pattern.sub("", name)
#     print(url_name)
    url_name = url_name.lower().replace(" ","-")
    url = "https://aipharm.xyz/"+type+"/"+str(id)+"/"+urllib.parse.quote(url_name)
    return url

df["loc"] = df.apply(lambda x: returnURL(x["name"],x["id"],"products"),axis=1)
df["lastmod"] = now.strftime("%Y-%m-%d")
df["changefreq"] = "monthly"
df["priority"] = 0.6

In [23]:
df = df.reindex(columns=["loc","lastmod","changefreq","priority"])
df

Unnamed: 0,loc,lastmod,changefreq,priority
0,https://aipharm.xyz/products/1/cerulean,2022-03-03,monthly,0.6
1,https://aipharm.xyz/products/2/fuchsia-rose,2022-03-03,monthly,0.6
2,https://aipharm.xyz/products/3/true-red,2022-03-03,monthly,0.6
3,https://aipharm.xyz/products/4/aqua-sky,2022-03-03,monthly,0.6
4,https://aipharm.xyz/products/5/tigerlily,2022-03-03,monthly,0.6
5,https://aipharm.xyz/products/6/blue-turquoise,2022-03-03,monthly,0.6


# - Generate static list
simply put any pages that you require here

In [25]:
df_main = pd.DataFrame(columns=["loc","lastmod","changefreq","priority"], data=[])
df_main = df_main.append(pd.DataFrame(columns=["loc","lastmod","changefreq","priority"], data=[["https://aipharm.xyz",now.strftime("%Y-%m-%d"),"daily",1.0]]))

array_list = ["page1","page2","page3"]
for i in array_list:
    df_main = df_main.append(pd.DataFrame(columns=["loc","lastmod","changefreq","priority"], data=[["https://aipharm.xyz/"+i,now.strftime("%Y-%m-%d"),"daily",1.0]]))

df_main

  df_main = df_main.append(pd.DataFrame(columns=["loc","lastmod","changefreq","priority"], data=[["https://aipharm.xyz",now.strftime("%Y-%m-%d"),"daily",1.0]]))
  df_main = df_main.append(pd.DataFrame(columns=["loc","lastmod","changefreq","priority"], data=[["https://aipharm.xyz/"+i,now.strftime("%Y-%m-%d"),"daily",1.0]]))
  df_main = df_main.append(pd.DataFrame(columns=["loc","lastmod","changefreq","priority"], data=[["https://aipharm.xyz/"+i,now.strftime("%Y-%m-%d"),"daily",1.0]]))
  df_main = df_main.append(pd.DataFrame(columns=["loc","lastmod","changefreq","priority"], data=[["https://aipharm.xyz/"+i,now.strftime("%Y-%m-%d"),"daily",1.0]]))


Unnamed: 0,loc,lastmod,changefreq,priority
0,https://aipharm.xyz,2022-03-03,daily,1.0
0,https://aipharm.xyz/page1,2022-03-03,daily,1.0
0,https://aipharm.xyz/page2,2022-03-03,daily,1.0
0,https://aipharm.xyz/page3,2022-03-03,daily,1.0


# - Combine both list
combine both static and dynamic list into 1 dataframe, remember to drop index

In [26]:
df_final = df_main.append(df)
df_final = df_final.reset_index(drop=True)
df_final

  df_final = df_main.append(df)


Unnamed: 0,loc,lastmod,changefreq,priority
0,https://aipharm.xyz,2022-03-03,daily,1.0
1,https://aipharm.xyz/page1,2022-03-03,daily,1.0
2,https://aipharm.xyz/page2,2022-03-03,daily,1.0
3,https://aipharm.xyz/page3,2022-03-03,daily,1.0
4,https://aipharm.xyz/products/1/cerulean,2022-03-03,monthly,0.6
5,https://aipharm.xyz/products/2/fuchsia-rose,2022-03-03,monthly,0.6
6,https://aipharm.xyz/products/3/true-red,2022-03-03,monthly,0.6
7,https://aipharm.xyz/products/4/aqua-sky,2022-03-03,monthly,0.6
8,https://aipharm.xyz/products/5/tigerlily,2022-03-03,monthly,0.6
9,https://aipharm.xyz/products/6/blue-turquoise,2022-03-03,monthly,0.6


## - Export it out as XML sitemap
Thats how easy we can make a XML sitemap out from pandas

In [27]:
df_final.to_xml("sitemap.xml" ,
                index=False,
                root_name='urlset',
                row_name='url',
                namespaces= {"": "http://www.sitemaps.org/schemas/sitemap/0.9"})  

## BONUS: Github upload

In [None]:
from github import Github

# using an access token
g = Github("XXXXXXXX")
repo = g.get_repo("xxxx/medium_article")
with open('sitemap.xml', 'r') as file:
    content = file.read()
    
contents = repo.get_contents("public/sitemap.xml")
repo.update_file("public/sitemap.xml", "update sitemap", content, contents.sha, branch="main")