In [1]:
import requests
import numpy as np
import pandas as pd
from pathlib import Path
import os

import json
from SPARQLWrapper import SPARQLWrapper, JSON
from pandas import DataFrame

pd.options.display.max_rows = 250
pd.options.display.max_columns = 50


In [2]:
wds = "https://query.wikidata.org/sparql" 

sparql = SPARQLWrapper("https://query.wikidata.org/sparql", 
            agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11")

def get_sparql_dataframe(service, query):
    """
    Helper function to convert SPARQL results into a Pandas data frame.
    """
    sparql = SPARQLWrapper(service,  agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11")
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query()

    processed_results = json.load(result.response)
    cols = processed_results['head']['vars']

    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))
        out.append(item)

    return pd.DataFrame(out, columns=cols)





rq = """
SELECT DISTINCT ?personLabel ?spouse1Label ?place_ownLabel ?place_spLabel
?marriage_date_st ?marriage_date_en ?bday_own ?bday_sp ?dday_own ?dday_sp ?genderLabel WHERE {

?person wdt:P31 wd:Q5.
?person wdt:P27 wd:Q668.
?spouse1 wdt:P27 wd:Q668.
?person wdt:P21 ?gender.

?person p:P26 ?spouse. 
?spouse ps:P26 ?spouse1.

?person wdt:P569 ?bday_own. 
?spouse1 wdt:P569 ?bday_sp.

OPTIONAL {?person wdt:P570 ?dday_own. }
OPTIONAL {?spouse wdt:P570 ?dday_sp. }

OPTIONAL {?person wdt:P19 ?place_own. }
OPTIONAL {?spouse1 wdt:P19 ?place_sp. }

?spouse pq:P580 ?marriage_date_st.
OPTIONAL {?spouse pq:P582 ?marriage_date_en. }

#FILTER year marriage date st)>2060)

SERVICE wikibase:label  {bd:serviceParam wikibase:language "en" . }
}
"""

In [3]:
df = get_sparql_dataframe(wds, rq)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 621 entries, 0 to 620
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   personLabel       621 non-null    object
 1   spouse1Label      621 non-null    object
 2   place_ownLabel    577 non-null    object
 3   place_spLabel     575 non-null    object
 4   marriage_date_st  621 non-null    object
 5   marriage_date_en  282 non-null    object
 6   bday_own          621 non-null    object
 7   bday_sp           621 non-null    object
 8   dday_own          129 non-null    object
 9   dday_sp           0 non-null      object
 10  genderLabel       621 non-null    object
dtypes: object(11)
memory usage: 53.5+ KB


In [5]:
df.head()

Unnamed: 0,personLabel,spouse1Label,place_ownLabel,place_spLabel,marriage_date_st,marriage_date_en,bday_own,bday_sp,dday_own,dday_sp,genderLabel
0,Jyothirmayi,Amal Neerad,Kottayam,Kollam,2015-01-01T00:00:00Z,,1983-04-05T00:00:00Z,1976-10-07T00:00:00Z,,,female
1,Ramya Krishnan,Krishna Vamsi,Chennai,Tadepalligudem,2003-01-01T00:00:00Z,,1967-09-15T00:00:00Z,1962-07-28T00:00:00Z,,,female
2,Gauri Khan,Shah Rukh Khan,New Delhi,New Delhi,1991-01-01T00:00:00Z,,1970-10-08T00:00:00Z,1965-11-02T00:00:00Z,,,female
3,Anuradha Ghandy,Kobad Ghandy,Mumbai,Mumbai,1983-01-01T00:00:00Z,2008-01-01T00:00:00Z,1954-03-28T00:00:00Z,1951-01-01T00:00:00Z,2008-04-12T00:00:00Z,,female
4,Amala Paul,A. L. Vijay,Kochi,Chennai,2014-01-01T00:00:00Z,2016-01-01T00:00:00Z,1991-10-26T00:00:00Z,1983-06-17T00:00:00Z,,,female


In [6]:
#Remove duplicates in pairing
m = ~pd.DataFrame(np.sort(df[['personLabel','spouse1Label']], axis=1)).duplicated()
df = df[m]

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 284 entries, 0 to 607
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   personLabel       284 non-null    object
 1   spouse1Label      284 non-null    object
 2   place_ownLabel    261 non-null    object
 3   place_spLabel     263 non-null    object
 4   marriage_date_st  284 non-null    object
 5   marriage_date_en  121 non-null    object
 6   bday_own          284 non-null    object
 7   bday_sp           284 non-null    object
 8   dday_own          56 non-null     object
 9   dday_sp           0 non-null      object
 10  genderLabel       284 non-null    object
dtypes: object(11)
memory usage: 26.6+ KB


In [8]:
df_male = df[df['genderLabel'] =='male']

df_male['bday_own']= pd.to_datetime(df_male['bday_own'], errors = 'coerce')
df_male['m_day'] = df_male['bday_own'].dt.day.astype(int)
df_male['m_month'] = df_male['bday_own'].dt.month.astype(int)
df_male['m_year'] = df_male['bday_own'].dt.year.astype(int)
df_male['m_hour'] = 12
df_male['m_min'] = 15
df_male['m_tzone'] = 5.5

df_male['bday_sp']= pd.to_datetime(df_male['bday_sp'], errors = 'coerce')
df_male['f_day'] = df_male['bday_sp'].dt.day.astype(int)
df_male['f_month'] = df_male['bday_sp'].dt.month.astype(int)
df_male['f_year'] = df_male['bday_sp'].dt.year.astype(int)
df_male['f_hour'] = 12
df_male['f_min'] = 10
df_male['f_tzone'] = 5.5

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[r

In [9]:
df_male

Unnamed: 0,personLabel,spouse1Label,place_ownLabel,place_spLabel,marriage_date_st,marriage_date_en,bday_own,bday_sp,dday_own,dday_sp,genderLabel,m_day,m_month,m_year,m_hour,m_min,m_tzone,f_day,f_month,f_year,f_hour,f_min,f_tzone
6,Abhishek Bachchan,Aishwarya Rai,Mumbai,Mangalore,2007-04-20T00:00:00Z,,1976-02-05 00:00:00+00:00,1973-11-01 00:00:00+00:00,,,male,5,2,1976,12,15,5.5,1,11,1973,12,10,5.5
9,Nikhil Nanda,Shweta Bachchan Nanda,Ludhiana,Mumbai,1997-02-16T00:00:00Z,,1974-03-18 00:00:00+00:00,1974-03-17 00:00:00+00:00,,,male,18,3,1974,12,15,5.5,17,3,1974,12,10,5.5
11,Manoj K. Jayan,Urvashi,Kottayam district,Thiruvananthapuram,2000-01-01T00:00:00Z,2008-01-01T00:00:00Z,1966-03-15 00:00:00+00:00,1967-01-25 00:00:00+00:00,,,male,15,3,1966,12,15,5.5,25,1,1967,12,10,5.5
12,Goldie Behl,Sonali Bendre,Mumbai,Mumbai,2002-11-12T00:00:00Z,,1969-02-23 00:00:00+00:00,1975-01-01 00:00:00+00:00,,,male,23,2,1969,12,15,5.5,1,1,1975,12,10,5.5
15,Randhir Kapoor,Babita,Mumbai,Mumbai,1971-01-01T00:00:00Z,,1947-02-15 00:00:00+00:00,1948-04-20 00:00:00+00:00,,,male,15,2,1947,12,15,5.5,20,4,1948,12,10,5.5
18,Sathaar,Jayabharathi,Kerala,Erode,1979-01-01T00:00:00Z,1987-01-01T00:00:00Z,1952-05-25 00:00:00+00:00,1950-01-01 00:00:00+00:00,2019-09-17T00:00:00Z,,male,25,5,1952,12,15,5.5,1,1,1950,12,10,5.5
22,Siddharth Roy Kapur,Vidya Balan,Mumbai,Palakkad district,2012-12-14T00:00:00Z,,1974-08-02 00:00:00+00:00,1979-01-01 00:00:00+00:00,,,male,2,8,1974,12,15,5.5,1,1,1979,12,10,5.5
27,Sumanth,Keerthi Reddy,Hyderabad,Hyderabad,2004-01-01T00:00:00Z,2006-01-01T00:00:00Z,1975-02-09 00:00:00+00:00,1978-11-17 00:00:00+00:00,,,male,9,2,1975,12,15,5.5,17,11,1978,12,10,5.5
28,Ranvir Shorey,Konkona Sen Sharma,Jalandhar,New Delhi,2010-01-01T00:00:00Z,,1972-08-18 00:00:00+00:00,1979-12-03 00:00:00+00:00,,,male,18,8,1972,12,15,5.5,3,12,1979,12,10,5.5
31,Anup Soni,Juhi Babbar,Pune,Lucknow,2011-01-01T00:00:00Z,,1975-01-30 00:00:00+00:00,1979-07-20 00:00:00+00:00,,,male,30,1,1975,12,15,5.5,20,7,1979,12,10,5.5


In [10]:
#pip install geopy

In [16]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="astroML")
#location = geolocator.geocode("Delhi")
df_male["Coordinates_own"] = df["place_ownLabel"].apply(geolocator.geocode)
df_male["Coordinates_sp"] = df["place_spLabel"].apply(geolocator.geocode)

df_male['m_lat'] = df_male["Coordinates_own"].apply(lambda x: x.latitude if x != None else None)
df_male['m_lon'] = df_male["Coordinates_own"].apply(lambda x: x.longitude if x != None else None)

df_male['f_lat'] = df_male["Coordinates_sp"].apply(lambda x: x.latitude if x != None else None)
df_male['f_lon'] = df_male["Coordinates_sp"].apply(lambda x: x.longitude if x != None else None)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()


In [22]:
df_fmale = df[df['genderLabel'] =='female']
df_fmale = df_fmale.iloc[:-1,:]

df_fmale['bday_own']= pd.to_datetime(df_fmale['bday_own'], errors = 'coerce')
df_fmale['f_day'] = df_fmale['bday_own'].dt.day.astype(int)
df_fmale['f_month'] = df_fmale['bday_own'].dt.month.astype(int)
df_fmale['f_year'] = df_fmale['bday_own'].dt.year.astype(int)
df_fmale['f_hour'] = 12
df_fmale['f_min'] = 15
df_fmale['f_tzone'] = 5.5

df_fmale['bday_sp']= pd.to_datetime(df_fmale['bday_sp'], errors = 'coerce')
df_fmale['m_day'] = df_fmale['bday_sp'].dt.day.astype(int)
df_fmale['m_month'] = df_fmale['bday_sp'].dt.month.astype(int)
df_fmale['m_year'] = df_fmale['bday_sp'].dt.year.astype(int)
df_fmale['m_hour'] = 12
df_fmale['m_min'] = 10
df_fmale['m_tzone'] = 5.5

geolocator = Nominatim(user_agent="astroML", timeout=3)
df_fmale["Coordinates_own"] = df["place_ownLabel"].apply(geolocator.geocode)
df_fmale["Coordinates_sp"] = df["place_spLabel"].apply(geolocator.geocode)

df_fmale['f_lat'] = df_fmale["Coordinates_own"].apply(lambda x: x.latitude if x != None else None)
df_fmale['f_lon'] = df_fmale["Coordinates_own"].apply(lambda x: x.longitude if x != None else None)

df_fmale['m_lat'] = df_fmale["Coordinates_sp"].apply(lambda x: x.latitude if x != None else None)
df_fmale['m_lon'] = df_fmale["Coordinates_sp"].apply(lambda x: x.longitude if x != None else None)


In [29]:
#cleanup
df1 = df_male.append(df_fmale)

In [31]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 283 entries, 6 to 597
Data columns (total 29 columns):
 #   Column            Non-Null Count  Dtype              
---  ------            --------------  -----              
 0   personLabel       283 non-null    object             
 1   spouse1Label      283 non-null    object             
 2   place_ownLabel    261 non-null    object             
 3   place_spLabel     262 non-null    object             
 4   marriage_date_st  283 non-null    object             
 5   marriage_date_en  121 non-null    object             
 6   bday_own          283 non-null    datetime64[ns, UTC]
 7   bday_sp           283 non-null    datetime64[ns, UTC]
 8   dday_own          56 non-null     object             
 9   dday_sp           0 non-null      object             
 10  genderLabel       283 non-null    object             
 11  m_day             283 non-null    int32              
 12  m_month           283 non-null    int32              
 13  m_yea

In [32]:
#clean
#If year of marriage end date is same is year of death, then remove

df1['dday_own']= pd.to_datetime(df1['dday_own'], errors = 'coerce')
df1['dday_sp']= pd.to_datetime(df1['dday_sp'], errors = 'coerce')
df1['marriage_date_en']= pd.to_datetime(df1['marriage_date_en'], errors = 'coerce')

In [33]:
df1 = df1[df_male['dday_own'].dt.year != df_male['marriage_date_en'].dt.year]

In [39]:
df1 = df1[df1['dday_own'].dt.year != df1['marriage_date_en'].dt.year]
df1 = df1[df1['dday_sp'].dt.year != df1['marriage_date_en'].dt.year]

In [52]:
#Remove rows wilh missing coordinates
df1.isnull().sum()
df1 = df1.dropna(subset=['place_ownLabel','place_spLabel','m_lat','f_lat','m_lon','f_lon'])

In [72]:
#Astro Data

dfx = df1
for index, row in dfx.iterrows():
    
    url = 'https://json.astrologyapi.com/v1/match_making_report'

    data = dict(
     m_day=row['m_day'],
     m_month=row['m_month'],
     m_year=row['m_year'],
     m_hour=row['m_hour'],
     m_min=row['m_min'],
     m_lat=row['m_lat'],
     m_lon=row['m_lon'],
     m_tzone=row['m_tzone'],
    
     f_day=row['f_day'],
     f_month=row['f_month'],
     f_year=row['f_year'],
     f_hour=row['f_hour'],
     f_min=row['f_min'],
     f_lat=row['f_lat'],
     f_lon=row['f_lon'],
     f_tzone=row['f_tzone']
        )

    r = requests.post(url=url, data=data, auth=("xxxxx","xxxxx"))
    print(f'Status {r.status_code}')
    
    api = r.json()
    
    dfx.loc[index, 'ashtakoota_ststus'] = api['ashtakoota']['status']
    dfx.loc[index, 'ashtakoota_received_points'] = api['ashtakoota']['received_points']
    dfx.loc[index, 'manglik_status'] = api['manglik']['status']
    dfx.loc[index, 'manglik_male_percentage'] = api['manglik']['male_percentage']
    dfx.loc[index, 'manglik_female_percentage'] = api['manglik']['female_percentage']
    dfx.loc[index, 'rajju_dosha_status'] = api['rajju_dosha']['status']
    dfx.loc[index, 'vedha_dosha_status'] = api['vedha_dosha']['status']
    dfx.loc[index, 'conclusion'] = api['conclusion']['match_report']
    
    #print(r.request.body)
    
    #dfx['sum'] = dfx['m_lat'] + dfx['f_lon']
    #print(row['m_lat'], row['f_lon'], row['sum'])

Status 200


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200
Status 200

In [76]:
dfx.to_csv('horo.csv')