<a href="https://colab.research.google.com/github/k-vinamr/address_cleaning_python/blob/main/Address_cleaning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **Trying to clean address of rera**

In [1]:
pip install fuzzywuzzy

Collecting fuzzywuzzy
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Installing collected packages: fuzzywuzzy
Successfully installed fuzzywuzzy-0.18.0


In [2]:
import numpy as np
import pandas as pd
from fuzzywuzzy import fuzz
import warnings as wng
import re
from tqdm import tqdm
from datetime import datetime, date, timedelta
from collections import Counter
from concurrent.futures import ThreadPoolExecutor, as_completed
from ast import literal_eval
from IPython.display import display, Markdown, clear_output



### **Setting Environment**

In [3]:
# for ignoring warning
wng.filterwarnings('ignore')
# setting display size
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

### **Functions and Utilities**

custom print function

In [4]:
def newPrint(title, msg="", size=3, center=False):
    n = size
    if msg != "":
        title = title + ": "
    if center:
        return display(Markdown(n*'#' + ' <center> <font color=green>'+title+'</font>' +
                                ' <font color=red>'+'<b>'+'<em>'+str(msg).replace('\n','<br>')+'</em>'+'</b>'+'</font> </center>'))
    else:
        return display(Markdown(n*'#' + ' <font color=green>'+title+'</font>' +
                                ' <font color=red>'+'<b>'+'<em>'+str(msg).replace('\n','<br>')+'</em>'+'</b>'+'</font>'))

Func for taking count of all occurences of specific word in all address database


In [5]:
def fetch_words(frame, col, all_words=True):
    out = []
    for s in frame[col]:
        s=str(s)
        for ss in s.split():
            if all_words:
                try:
                    out.append(ss.upper().strip())
                except:
                    continue

    word_corpus = Counter(out)
    return word_corpus

Func to access all keys with given specifc value


In [6]:
def get_key(val,my_dict):
    out=[]
    for key, value in my_dict.items():
         if val == value:
              out.append(key)
    return out

Func to insert space between alpha-numeral


In [7]:
def insert_spaces(regex,str1):
    return ' '.join((re.sub(regex, ' \g<0> ', str1)).split())

Func to recognise dtype of word


In [8]:
def ifnum(z):
    return "Numeric" if len(re.findall(r'\d',z))!= 0 else "NonNumeric"

Func to fetch pincodes from address to a new column

In [9]:
def extract_pin(mdf,col,dest,reg):# mdf-dataframe, col-address col, dest-pinode, reg- re.pattern for pincode
    mdf[dest] = mdf[col].str.extract(reg)
    mdf[col] = mdf[col].str.replace(reg,'',regex=True)

### **Lists for removing and replacing char from addresses**

In [10]:
rem_list = ["EXTN","EXT","EXTENSION","NR","NEAR","DEHLI","DELLI","DELHI","DILLI","DELHI","DELHGI","DELH I","DELH","DILHI","DIHLI"]
plot_list = ["PLOT NOS","NOF","FNO","HNO","H NO","HNOE","HNOM","FLAT"]
street_list = ['STREET','STREETNO','STREET NO','STNO','GLII','GLI',"GALI","GALI NO","GN","GNO","GALIN","GALINO","G-NO","GALO","GALO NO","NOM","GALLI","GALLI NO","GAALI","GAALI NO","BUND GALI","BAND GALI","BAND GALLI","NOG","NO-G"]
plot_dict = {plot: "HOUSE NO" for plot in plot_list}
street_dict = {street: "STREET" for street in street_list}
loc_list = ['NAGAR','COLONY']
FLOOR_list = ['FLOOR','FLR','FLOR','GF','FF','SF','TF','UGF','LGF','IF','IIF','IIIF','G F','F F','S F','T F','IV F','TH F','UG F','LG F','FL']
repl_dict0 = {
            "FEET":"FUTA","FOOTA":"FUTA","FEETA":"FUTA","FOOT":"FUTA","FOOTA ROAD":"FUTA","FOOTA RAOD":"FUTA","FOOT ROAD":"FUTA","FOOTA RODA":"FUTA"\
            ,"FOOTA ROD":"FUTA","FEET ROAD":"FUTA","FEET RAOD":"FUTA","FOOT RD":"FUTA","FEET RD":"FUTA","FOOTA RD":"FUTA","FEETA RD":"FUTA","FUT":"FUTA",\
            "FUT ROAD":"FUTA","FUT ROD":"FUTA","FUTA ROAD":"FUTA","FUTA RAOD":"FUTA","FIT ROAD":"FUTA","FITA ROAD":"FUTA","FITA RAOD":"FUTA","FITA RD":"FUTA"\
            ,"FUTA":"FUTA","PARAK":"PARK","PRAK":"PARK","PRK":"PARK","PARK":"PARK","GRDN":"GARDEN","GARDN":"GARDEN","GARDAN":"GARDEN","GRDEN":"GARDEN",\
            "GRDAN":"GARDEN","GERDEN":"GARDEN","GERDAN":"GARDEN","GDN":"GARDEN","GARDEN":"GARDEN","GADREN":"GARDEN","COLNY":"COLONY","COLINY":"COLONY",\
            "CLONY":"COLONY","CALONY":"COLONY","CLY":"COLONY","CLNY":"COLONY","CNY":"COLONY","COLONI":"COLONY","COLNIY":"COLONY","COLONIY":"COLONY",\
            "COLONIY":"COLONY","CALONEY":"COLONY","COLONE":"COLONY","COLONU":"COLONY","CLONEY":"COLONY","KOLONY":"COLONY","KALONI":"COLONY",\
            "COLONY":"COLONY","VIHR":"VIHAR","VHR":"VIHAR","VHAR":"VIHAR","VEHAR":"VIHAR","VIHAAR":"VIHAR","VAHAR":"VIHAR","VIHAR":"VIHAR",\
            "ENCLAV":"ENCLAVE","ENCALAVE":"ENCLAVE","ENCALAV ":"ENCLAVE","ENCALAV ":"ENCLAVE","INCLAVE":"ENCLAVE","ENCLAVE":"ENCLAVE",\
            "DILHAD":"DILSHAD","DELSHAD":"DILSHAD","DLSHAD":"DILSHAD","DILSHAD":"DILSHAD","BAG ":"BAGH","BAHG ":"BAGH","BAHGH":"BAGH",\
            "BHAG":"BAGH","BAAG ":"BAGH","BAAGH":"BAGH","BHAAG":"BAGH","BAGH":"BAGH","SHAHADARA":"SHAHDARA","SHADARA":"SHAHDARA","SADARA":"SHAHDARA",\
            "SHADRA":"SHAHDARA","SADRA":"SHAHDARA","SHAHDRA":"SHAHDARA"
            }
repl_dict1 = {
            'AP':'APARTMENTS','ADD':'ADDRESS','APT':'APARTMENTS','APPT':'APARTMENTS','ADMN':'ADMINISTRATION','APTT':'APARTMENTS','APART':'APARTMENTS'\
            ,'APARTMANT':'APARTMENTS','AHINSHAVATIKA':'AHINSA VATIKA','BL':'BLOCK','BLK':'BLOCK','BLOK':'BLOCK','BLOC':'BLOCK','BLCOK':'BLOCK','CLY':'COLONY',\
            'CGHS':'APARTMENTS','CHILL':'CHILLA','CICEK':'VIVEK','COMPLES':'COMPLEX','DASH':'RAVIDAS','DABAL':'DOUBLE','DALLUPRA':'DALLUPURA',\
            'DHARMSHAL':'DHARMSHALA','DHARAMSALA':'DHARMSHALA','DHARMASHALA':'DHARMSHALA','EXT':'EXTENSION','ENV':'ENCLAVE','ECV':'ENCLAVE','ENC':'ENCLAVE'\
            ,'EXTN':'EXTENSION','ENCV':'ENCLAVE','ENCL':'ENCLAVE','EXTEN':'EXTENSION','EXTENED':'EXTENDED','ENCALVE':'ENCLAVE','FL':'FLOOR','FR':'FIRST',\
            'FT':'FOOTA','FLR':'FLOOR','FLO':'FLOOR','FNO':'FLAT NO','FLT':'FLAT','FLOR':'FLOOR','FALT':'FLAT','FLOO':'FLOOR','FLOT':'FLAT','FLOAR':'FLOOR',\
            'FUTTA':'FOOTA','FLORE':'FLOOR','FLATE':'FLAT','FLATNO':'FLAT NO','GD':'GHAROLI DAIRY','GF':'GROUND FLOOR','GRD':'GROUND','GOP':'GOPAL',\
            'GOVT':'GOVERNMENT','GHRO':'GHAROLI','GORAK':'GORAKH','GALINO':'GALI NO','GHALORI':'GHAROLI','GITANJALI':'GEETANJALI','HNO':'HOUSE NO',\
            'HIMALYA':'HIMALAYA','HARGOBIND':'HARGOVIND','IP':'INDRAPRASTHA','JAMUNA':'YAMUNA','KH':'KHASRA','KS':'KHASRA','KN':'KHASRA','KHNO':'KHASRA','KONDLY':'KONDLI',\
            'LG':'LOWERGROUND','LL':'LOWERGROUND','LTD':'LIMITED','LIF':'LIG','LOOR':'FLOOR','MAYU':'MAYUR','MAND':'MANDIR','MANDER':'MANDIR','MAHABIR':'MAHAVIR'\
            ,'MANWABAN':'MANBHAWAN','MULTISTORE':'MULTI STOREY','MANSAROVER':'MANSAROVAR','NEWNO':'NEW NO','NUBMER':'NUMBER','NALAPAAR':'NALA PAAR',\
            'NAGARUJANA':'NAGARJUNA','OPP':'OPPOSITE','OTH':'OTHER','OLDNO':'OLD NO','OPPSITE':'OPPOSITE','PH':'PHASE','PKT':'POCKET','PNO':'PLOT NO','PCK':'POCKET','PVT':'PRIVATE','PROP':'PROPERTY','POKT':'POCKET','POCK':'POCKET','PLOY':'PLOT','POLT':'PLOT','PLOR':'PLOT','PRAK':'PARK','PKTA':'POCKET A','PLOAT':'PLOT','PCKET':'POCKET','PODHO':'PAUDHE','PLAOT':'PLOT','PLOTNO':'PLOT NO','PUADHE':'PAUDHE','PARVANA':'PARWANA','PANCHEEL':'PANCHSHEEL','PANSHEEL':'PANCHSHEEL','PRIYADARS':'PRIYADARSHINI','QTR':'QUARTERS','QRTS':'QUARTERS','QARTR':'QUARTERS','QUARTERS':'QUARTERS','RAJBLOC':'RAJ BLOCK','SEC':'SECTION','SAUD':'SAUDAGAR','SITA':'SITARAM','STNO':'STREET NO','SHADRA':'SHAHDARA','SRIRAM':'SHRIRAM','SHAHDRA':'SHAHDARA','SHEKHAR':'SHEKHER','SOCITIES':'SOCIETY','SADBHAWNA':'SADHBHAWNA','SADHBHAVNA':'SADHBHAWNA','SHIVMANDIR':'SHIV MANDIR','SHREEGANESH':'SHRIGANESH','SHALIMARPARK':'SHALIMAR PARK','TF':'THIRD FLOOR','TYP':'TYPE','TYPR':'TYPE','TRIVANI':'TRIVENI','TAKSHILA':'TAKSHSHILA','TECHNOLOG':'TECHNOLOGY','UG':'UPPERGROUND','UGF':'UPPERGROUND FLOOR','UPR':'UPPER','UPPAR':'UPPER','UCHEPAR':'UNCHEPAR','UTTRANCHA':'UTTRANCHAL','YUDHISTER':'YUDHISHTHIR','YUDISHTER':'YUDHISHTHIR','YUDHESTAR':'YUDHISHTHIR','YUDHISHTIR':'YUDHISHTHIR','YUDHISTHIR':'YUDHISHTHIR','C G H S':'APARTMENTS','GAUTAM SSHB GAMIL COM':' ','SHELLYKHATHURIA1994 GMAIL COM':' ','IGLDELHI GMAIL COM':' ','RATURI PRADEEP26 GMAIL COM':' ','I P':'INDRAPRASTHA','I P EXTENSION':'INDRAPRASTHA EXTENSION','PARK END':'PARKEND','EAST END':'EASTEND','VIGYAN LOK':'VIGYANLOK','ANAND LOK':'ANANDLOK','SANCHAR LOK':'SANCHARLOK','VIGYAPAN LOK':'VIGYAPANLOK','NAV JAGRITI':'NAVJAGRITI','NAV RACHNA':'NAVRACHNA','NAV BHARAT':'NAVBHARAT','SHANKAR PUR':'SHANKARPUR','TAHIR PUR':'TAHIRPUR','HASAN PUR':'HASANPUR','SEELAM PUR':'SEELAMPUR','SHIV PURI':'SHIVPURI','GURGA PURI':'DURGAPURI','TRILOK PURI':'TRILOKPURI','TIRLOK PURI':'TRILOKPURI','JAGAT PURI':'JAGATPURI','DWARIKA PURI':'DWARKAPURI','RIVER VIEW':'RIVERVIEW','LAKE VIEW':'LAKEVIEW','PARK VIEW':'PARKVIEW','SRI RAM':'SHRIRAM','SRI KRISHNA':'SHRIKRISHNA','SRI GURU':'SHRIGURU'
            }
repl_dict2 = {'APARTMENT':'APARTMENTS','BAZAR':'BAZAAR','RAMDAS':'RAMDASS','UNCHE':'UNCHEPAR','YOJANA':'YOJNA','SRESHTHA':'SHRESHTHA','DEPT':'DEPARTMENT','ENGINEERS':'ENGINEER','BARI':'BADI','RAVIDAS':'RAVIDASS','AGARSEN':'AGRASEN','MIX':'MIXED','NAVNITI':'NAVNEETI','GOURAV':'GAURAV','NOA':'NO','IGESI':'ESI','SIDHARTH':'SIDDHARTH','PARSHVA':'PARSHVANATH','MANAVASATHALI':'MANAVSTHALI','ASHISHWANG':'ASHISHWONG','KALLOL':'KALOL','TEACHER':'TEACHERS','VIKALPA':'VIKALP','TRILOK':'TRILOKYA','GRD':'GROUND','COMPUTER':'COMPUTERS','SISHU':'SHISHU','POLT':'PLOT','ASSOCIATED':'ASSOCIATE','SHEKHER':'SHEKHAR','BLK':'BLOCK','SHAHDRA':'SHAHDARA'}
repl_dict3 = {'F/F':'FIRST','S/F':'SECOND','T/F':'THIRD','IST':'FIRST','3 ND':'THIRD','II ND':'SECOND','U G':'UPPERGROUND','GRD':'GROUND','BLOCK NO':'BLOCK','8 TH':'EIGHTH','11 TH':'ELEVENTH','15 TH':'FIFTEENTH','5 TH':'FIFTH','VF':'FIFTH FLOOR','V F':'FIFTH FLOOR','1 ST':'FIRST','IF':'FIRST FLOOR','I F':'FIRST FLOOR','FF':'FIRST FLOOR','F F':'FIRST FLOOR','FL':'FLOOR','FLR':'FLOOR','FLOR':'FLOOR','48 TH':'FORTYEIGHTH','14 TH':'FOURTEENTH','4 TH':'FOURTH','IVF':'FOURTH FLOOR','IV F':'FOURTH FLOOR','GF':'GROUND FLOOR','G F':'GROUND FLOOR','FALT':'HOUSE NO','FLT':'HOUSE NO','FLOT':'HOUSE NO','COMPOUNDNO':'HOUSE NO','FNO':'HOUSE NO','FLATNO':'HOUSE NO','HNO':'HOUSE NO','ROOMNO':'HOUSE NO','FL NO':'HOUSE NO','FLAT NO':'HOUSE NO','H NO':'HOUSE NO','F NO':'HOUSE NO','ROOM NO':'HOUSE NO','FLAT':'HOUSE NO','HN':'HOUSE NO','HOUSENO':'HOUSE NO','PROPNO':'HOUSE NO','SHOPNO':'HOUSE NO','FTNO':'HOUSE NO','SNO':'HOUSE NO','PVTNO':'HOUSE NO','FLATE':'HOUSE NO','HO NOB':'HOUSE NO B','H NOC':'HOUSE NO C','KHASRA NO':'KHASRA','KH NO':'KHASRA','K NO':'KHASRA','KILLA NO':'KHASRA','LGF':'LOWERGROUND FLOOR','LG F':'LOWERGROUND FLOOR','L GF':'LOWERGROUND FLOOR','NEW NO':'NEW','19 TH':'NINETEENTH','9 TH':'NINTH','90 TH':'NINTIETH','OLDNO':'OLD','OLD NO':'OLD','PARK NO':'PARK','PLOTNO':'PLOT','PLOT NO':'PLOT','P NO':'PLOT','PNO':'PLOT','POLT':'PLOT','PN':'PLOT','PLOR':'PLOT','PLOY':'PLOT','PLOAT':'PLOT','PLAOT':'PLOT','PLT':'PLOT','2 ND':'SECOND','IIF':'SECOND FLOOR','II F':'SECOND FLOOR','SF':'SECOND FLOOR','S F':'SECOND FLOOR','7 TH':'SEVENTH','VIIF':'SEVENTH FLOOR','VII F':'SEVENTH FLOOR','16 TH':'SIXTEENTH','6 TH':'SIXTH','VIF':'SIXTH FLOOR','VI F':'SIXTH FLOOR','60 TH':'SIXTIETH','GALNO':'STREET','GALINO':'STREET','STNO':'STREET','GALI NO':'STREET','STREET NO':'STREET','LANE NO':'STREET','GALI':'STREET','GLII':'STREET','GLI':'STREET','GNO':'STREET','GN':'STREET','G NO':'STREET','STREETNO':'STREET','GATENO':'STREET','10 TH':'TENTH','3 RD':'THIRD','IIIF':'THIRD FLOOR','III F':'THIRD FLOOR','TF':'THIRD FLOOR','T F':'THIRD FLOOR','THF':'THIRD FLOOR','TH F':'THIRD FLOOR','RDFLOOR':'THIRD FLOOR','THFLOOR':'THIRD FLOOR','13 TH':'THIRTEENTH','30 TH':'THIRTIETH','38 TH':'THIRTYEIGHTH','TOWER NO':'TOWER','12 TH':'TWELFTH','UGF':'UPPERGROUND FLOOR','UG F':'UPPERGROUND FLOOR','U GF':'UPPERGROUND FLOOR','11 ST':'ELEVENTH','12 ND':'TWELFTH','13 RD':'THIRTEENTH','17 TH':'SEVENTEENTH','18 TH':'EIGHTEENTH','20 TH':'TWENTIETH','21 ST':'TWENTYFIRST','22 ND':'TWENTYSECOND','23 RD':'TWENTYTHIRD','24 TH':'TWENTYFOURTH','25 TH':'TWENTYFIFTH','26 TH':'TWENTYSIXTH','27 TH':'TWENTYSEVENTH','28 TH':'TWENTYEIGHTH','29 TH':'TWENTYNINTH','31 ST':'THIRTYFIRST','32 ND':'THIRTYSECOND','33 RD':'THIRTYTHIRD','34 TH':'THIRTYFOURTH','35 TH':'THIRTYFIFTH','36 TH':'THIRTYSIXTH','37 TH':'THIRTYSEVENTH','39 TH':'THIRTYNINTH','40 TH':'FORTIETH','41 ST':'FORTYFIRST','42 ND':'FORTYSECOND','43 RD':'FORTYTHIRD','44 TH':'FORTYFOURTH','45 TH':'FORTYFIFTH','46 TH':'FORTYSIXTH','47 TH':'FORTYSEVENTH','49 TH':'FORTYNINTH','50 TH':'FIFTIETH','51 ST':'FIFTYFIRST','52 ND':'FIFTYSECOND','53 RD':'FIFTYTHIRD','54 TH':'FIFTYFOURTH','55 TH':'FIFTYFIFTH','56 TH':'FIFTYSIXTH','57 TH':'FIFTYSEVENTH','58 TH':'FIFTYEIGHTH','59 TH':'FIFTYNINTH','61 ST':'SIXTYFIRST','62 ND':'SIXTYSECOND','63 RD':'SIXTYTHIRD','64 TH':'SIXTYFOURTH','65 TH':'SIXTYFIFTH','66 TH':'SIXTYSIXTH','67 TH':'SIXTYSEVENTH','68 TH':'SIXTYEIGHTH','69 TH':'SIXTYNINTH','70 TH':'SEVENTIETH','71 ST':'SEVENTYFIRST','72 ND':'SEVENTYSECOND','73 RD':'SEVENTYTHIRD','74 TH':'SEVENTYFOURTH','75 TH':'SEVENTYFIFTH','76 TH':'SEVENTYSIXTH','77 TH':'SEVENTYSEVENTH','78 TH':'SEVENTYEIGHTH','79 TH':'SEVENTYNINTH','80 TH':'EIGHTIETH','81 ST':'EIGHTYFIRST','82 ND':'EIGHTYSECOND','83 RD':'EIGHTYTHIRD','84 TH':'EIGHTYFOURTH','85 TH':'EIGHTYFIFTH','86 TH':'EIGHTYSIXTH','87 TH':'EIGHTYSEVENTH','88 TH':'EIGHTYEIGHTH','89 TH':'EIGHTYNINTH','90 TH':'NINETIETH','91 ST':'NINETYFIRST','92 ND':'NINETYSECOND','93 RD':'NINETYTHIRD','94 TH':'NINETYFOURTH','95 TH':'NINETYFIFTH','96 TH':'NINETYSIXTH','97 TH':'NINETYSEVENTH','98 TH':'NINETYEIGHTH','99 TH':'NINETYNINTH','100 TH':'HUNDREDTH','1ST':'FIRST','2ND':'SECOND','3RD':'THIRD','4TH':'FOURTH','5TH':'FIFTH','6TH':'SIXTH','7TH':'SEVENTH','8TH':'EIGHTH','9TH':'NINTH','10TH':'TENTH','11TH':'ELEVENTH','12TH':'TWELFTH','13TH':'THIRTEENTH','14TH':'FOURTEENTH','15TH':'FIFTEENTH','16TH':'SIXTEENTH','17TH':'SEVENTEENTH','18TH':'EIGHTEENTH','19TH':'NINETEENTH','20TH':'TWENTIETH','21ST':'TWENTYFIRST','22ND':'TWENTYSECOND','23RD':'TWENTYTHIRD','24TH':'TWENTYFOURTH','25TH':'TWENTYFIFTH','26TH':'TWENTYSIXTH','27TH':'TWENTYSEVENTH','28TH':'TWENTYEIGHTH','29TH':'TWENTYNINTH','30TH':'THIRTIETH','31ST':'THIRTYFIRST','32ND':'THIRTYSECOND','33RD':'THIRTYTHIRD','34TH':'THIRTYFOURTH','35TH':'THIRTYFIFTH','36TH':'THIRTYSIXTH','37TH':'THIRTYSEVENTH','38TH':'THIRTYEIGHTH','39TH':'THIRTYNINTH','40TH':'FORTIETH','41ST':'FORTYFIRST','42ND':'FORTYSECOND','43RD':'FORTYTHIRD','44TH':'FORTYFOURTH','45TH':'FORTYFIFTH','46TH':'FORTYSIXTH','47TH':'FORTYSEVENTH','48TH':'FORTYEIGHTH','49TH':'FORTYNINTH','50TH':'FIFTIETH','51ST':'FIFTYFIRST','52ND':'FIFTYSECOND','53RD':'FIFTYTHIRD','54TH':'FIFTYFOURTH','55TH':'FIFTYFIFTH','56TH':'FIFTYSIXTH','57TH':'FIFTYSEVENTH','58TH':'FIFTYEIGHTH','59TH':'FIFTYNINTH','60TH':'SIXTIETH','61TH':'SIXTYFIRST','62ND':'SIXTYSECOND','63RD':'SIXTYTHIRD','64TH':'SIXTYFOURTH','65TH':'SIXTYFIFTH','66TH':'SIXTYSIXTH','67TH':'SIXTYSEVENTH','68TH':'SIXTYEIGHTH','69TH':'SIXTYNINTH','70TH':'SEVENTIETH','71ST':'SEVENTYFIRST','72ND':'SEVENTYSECOND','73RD':'SEVENTYTHIRD','74TH':'SEVENTYFOURTH','75TH':'SEVENTYFIFTH','76TH':'SEVENTYSIXTH','77TH':'SEVENTYSEVENTH','78TH':'SEVENTYEIGHTH','79TH':'SEVENTYNINTH','80TH':'EIGHTIETH','81ST':'EIGHTYFIRST','82ND':'EIGHTYSECOND','83RD':'EIGHTYTHIRD','84TH':'EIGHTYFOURTH','85TH':'EIGHTYFIFTH','86TH':'EIGHTYSIXTH','87TH':'EIGHTYSEVENTH','88TH':'EIGHTYEIGHTH','89TH':'EIGHTYNINTH','90TH':'NINETIETH','91ST':'NINETYFIRST','92ND':'NINETYSECOND','93RD':'NINETYTHIRD','94TH':'NINETYFOURTH','95TH':'NINETYFIFTH','96TH':'NINETYSIXTH','97TH':'NINETYSEVENTH','98TH':'NINETYEIGHTH','99TH':'NINETYNINTH','100TH':'HUNDREDTH'}
repl_dict = {}
repl_dict.update(repl_dict0)
repl_dict.update(repl_dict1)
repl_dict.update(repl_dict2)
repl_dict.update(repl_dict3)
repl_dict.update(plot_dict)
repl_dict.update(street_dict)
len(repl_dict)

592

Func to remove unwanted words from corpus


In [11]:
def remov_list(frame, col):
    frame[col] = frame[col].astype(str)
    for word in tqdm(rem_list, desc="ADDRESS processed", colour='green'):
        frame[col] = frame[col].str.replace(f'{word}', "")

Check for word ID


In [12]:
def word_id(word):
    word = word.upper()
    if word in plot_list:
        return 'PLOT MARKER'
    elif word in street_list:
        return 'STREET MARKER'
    elif word in FLOOR_list:
        return 'FLOOR MARKER'

To fetch words before and after a word in a string


In [13]:
def fetch_word_bef_aft_string(kw,str1,mode=0):
    b = ''
    a = ''
    loc = ''
    try:

        ix = str1.find(kw)
        l = len(kw)
        ch_b = ''
        ch_a = ''

        if ix == -1:
            b = ''
            a = ''
            loc = 'N'
        elif l == len(str1):
            b = ''
            a = ''
            loc = 'S'
        elif ix == 0:
            b = ''
            ch_a = str1[ix+l]
            loc = 'S'
            if ch_a == ' ':
                a = kw + ' ' + str1[ix+l:].split()[0]
            else:
                if str1.find(' ') != -1:
                    a = kw + str1[ix+l:str1.find(' ')]
                else:
                    a = kw + str1[ix+l:]
        elif ix + l == len(str1):
            a = ''
            loc = 'E'
            ch_b = str1[ix-1]
            if ch_b == ' ':
                b = str1[:ix].split()[-1] + ' ' + kw
            else:
                if str1[:ix].rfind(' ') != -1:
                    b = str1[str1[:ix].rfind(' ')+1:ix] + kw
                else:
                    b = str1[:ix]+kw
        else:
            ch_b = str1[ix-1]
            ch_a = str1[ix+l]
            loc = 'M'
            if ch_a == ' ':
                if ch_b == ' ':
                    b = b = str1[:ix].split()[-1] + ' ' + kw
                    a = kw + ' ' + str1[ix+l:].split()[0]
                else:
                    b = str1[str1[:ix].rfind(' ')+1:ix] + kw
                    a = str1[str1[:ix].rfind(' ')+1:ix] + \
                        kw + ' ' + str1[ix+l:].split()[0]
            else:
                if ch_b == ' ':
                    b = str1[:ix].split()[-1] + ' ' + kw + \
                        str1[ix+l:str1[ix+l:].find(' ')+ix+l]
                    a = kw + str1[ix+l:str1[ix+l:].find(' ')+ix+l]
                else:
                    b = str1[str1[:ix].rfind(' ')+1:ix] + kw + \
                        str1[ix+l:str1[ix+l:].find(' ')+ix+l]
                    a = str1[str1[:ix].rfind(' ')+1:ix] + kw + \
                        str1[ix+l:str1[ix+l:].find(' ')+ix+l]
    except:
        pass

    if mode==1:
        return b, a, loc
    else:
        return b, a

To fetch address markers from an address

In [14]:
############################ Fetch House number ############################
def fetch_PLOT(z):
    pattern = re.compile(r"(\s+([0-9]+\s+)+)", re.IGNORECASE)
    z = "temp "+z
    extracted_substring = re.search(pattern, z)
    if extracted_substring == None:
        return None
    else:
        extracted_substring = extracted_substring.group()
    return extracted_substring


############################ Fetch Floor ############################
def fetch_FLOOR(add):
    try:
        if fetch_word_bef_aft_string('FLOOR',add,mode=1)[2]!='S':
            out = fetch_word_bef_aft_string('FLOOR',add)[0]
            return out.split()[0], ' '.join([x for x in add.split() if x not in (out.split())])
        else:
            out = fetch_word_bef_aft_string('FLOOR',add)[1]
            return out.split()[-1], ' '.join([x for x in add.split() if x not in (out.split())])
    except:
        return None,' '.join([x for x in add.split() if x != 'FLOOR'])

############################ Fetch Street ############################
def fetch_STREET(add):
    try:
        out = fetch_word_bef_aft_string('STREET',add)[1]
        if (out.split()[-1]).isdigit()==True:
            return out.split()[-1], ' '.join([x for x in add.split() if x not in (out.split())])
        else:
            out2 = fetch_word_bef_aft_string('STREET',add)[0]
            return out2.split()[0], ' '.join([x for x in add.split() if x not in (out2.split())])

    except:
        return None,' '.join([x for x in add.split() if x != 'STREET'])

############################ Fetch Pocket ############################
def fetch_POCKET(add):
    try:
        out = fetch_word_bef_aft_string('POCKET',add)[1]
        return out.split()[-1], ' '.join([x for x in add.split() if x not in (out.split())])
    except:
        return None,' '.join([x for x in add.split() if x != 'POCKET'])
############################ Fetch Block ############################
def fetch_BLOCK(add):
    try:
        out = fetch_word_bef_aft_string('BLOCK',add)[1]
        if len(out.split()[-1])<=2:
            return out.split()[-1], ' '.join([x for x in add.split() if x not in (out.split())])
        else:
            out2 = fetch_word_bef_aft_string('BLOCK',add)[0]
            return out2.split()[0], ' '.join([x for x in add.split() if x not in (out2.split())])
    except:
        return None,' '.join([x for x in add.split() if x != 'BLOCK'])

### **Main Code**

In [15]:
df = pd.read_csv('rera_govind.csv')
df["ADDRESS_ORG"]=df['project_address'].apply(lambda z: str(z).upper())
df.drop(['project_address'],axis=1,inplace=True)
df.head()

Unnamed: 0,unicode,organisation_name,organisation_type,project,project_res_no,project_category,project_address_pincode,city,organisation_state,tehsil,website,zipcode,office_no,estimated_commencement_date,actual_commencement_date,estimated_finish_date,total_built_up_area_saleable_area,phase_area,open_area,total_area_of_project,fees_to_be_paid_to_raj_rera,number_of_apartments_plots,sanctioned_number_of_apartments_plots,land_cost_as_per_rule_5_1,development_cost_as_per_rule_5_2,building_name,block_no,full_address,month_difference_start_and_finish,floor,temp_rera,email,project_status,project_state,project_address_street,project_carpet_area,residential_projects_total_units,residential_projects_available_units,plan_passing_authority,plan_passing_authority_approved_date,Extended End Date,Project development work,Project District,land_ownership,number_of_sanctioned_building,built_up_area_as_per_proposed_fsi,permissible_built_up_area,number_of_basements,maharastra_rera_no,form_filling_project_date,ADDRESS_ORG
0,Data_input_two_716500,PARAS LIFESTYLES PVT LTD,Company,PARAS GULAB VATIKA PHASE II,P-SEH-17-039,Residential/Group Housing,,Sehore,,,,,9826054143,,30-07-2016,30-09-2018,5200.0,,,,,,,,,,,Adjacent to Paras Emperor E-8 Extension Bawadian Kalan Bhopal � 462026,26.0,,,info@paraslifestyles.com,Ongoing,Madhya Pradesh,,,132.0,,,,,Consultants of Project,Sehore,1. Paras Lifestyles Pvt. Ltd 2. Mr. Sudarshan Rai & Mr. Anil Rai,,,,,,,KHASRA NOS. 851/1/2/1 851/2 852/2 853/1 873/2/2 877/2 AND 879/2KH TOTALLY ADMEASURING 1.155 HECTARE (PARAS LIFESTYLES PVT LTD) AND KHASRA NOS. 852/1 873/3KH 874/5 876/1 877/1 878/1 AND 879/1 TOTALLY ADMEASURING 1.154 HECTARE =11 540 (SUDARSHAN RAI & ANIL RAI)
1,Data_input_383000,51 MALL,Partnership Firm,51 MALL 2,P-IND-21-2993,,,Indore,,,,,9826051989,,4/1/2021,4/1/2024,1820.0,,,,,,,180.69,18200000.0,,,123 Palika Plaza Ist Floor MTH Compound Indore,36.0,,,theoutdoorindia@gmail.com,New,Madhya Pradesh,,,38.0,,,,,,Indore,SELF,,,,,,,PLOT NO. 5 SCHEME NO. 51 AVANTIKA INDORE
2,Data_input_two_410000,MAA INFRASTRUCTURE,Partnership Firm,RUDRANSH VIHAR,P-OTH-23-3839,,,Badnawar,,,,,9617123456,,15-06-2022,24-05-2025,1682.2,,,,,,,89.92,16822000.0,,,220 VIP paraspar nagar Indore (M.P.)- 452012,35.0,,,maainfrastructure3@gmail.com,New,Madhya Pradesh,,,137.0,,,,,,Dhar,hassan panwala' 'Gendalal mukati' ' kutbuddin hussain,,,,,,,KHASRA NO. PART OF 1397/1/1/2 1400 1398/1 GRAM KHEDA TEHSIL BADNAWER DISTRICT DHAR
3,Data_input_two_428000,M/S JAISHAKTI HOMES,Partnership Firm,JAI SHAKTI HOMES PHASE 4,P-HRD-18-1891,,,Handiya,,,,,9425042452,,22-06-2016,8/8/2019,2000.7,,,,,,,187.07,20007000.0,,,GIRJA SHANKAR NAGAR NEAR BYPASS ROAD HARDAS,38.0,,,matushree@yahoo.com,Ongoing,Madhya Pradesh,,,78.0,,,,,,Harda,Mr. Vivek Kumar Agarwal and Mr. Shobhram Gour,,,,,,,SURVEY NO. 164/5 164/6 160/19 160/34 160/35 VILLAGE GRAM HARDA KHAS TEHSIL HARDA DISTRICT HARDA (M.P.)
4,Data_input_two_654000,HARISH VIJAYVARGIYA,Individual,HARI VATIKA-02,P-MNS-22-3251,,,Mandsaur,,,,,8085577777,,12/2/2020,12/1/2023,505.4,,,,,,,20.0,5054000.0,,,14 Sudama Nagar Scheme No. 01 Mandsaur M.P. 458001,35.0,,,breez.harish@gmail.com,New,Madhya Pradesh,,,78.0,,,,,,Mandsaur,Mr. Harish S/o Premchandraji Vijayvargiya' ' Mr. Surendra Kumar S/o Hariramji' ' Mr. Radheshyam S/o Hariramji' ' Mrs. Dakha Bai W/o Hariramji,,,,,,,SURVEY NO. 231/1 233 AT VILLAGE-TODI TEHSIL AND DIST.- MANDSAUR M.P.


In [16]:
for c in df.select_dtypes(float):
    df[c] = df[c].fillna(0)

############################ Inserting space between numerals and characters ############################
df['ADDRESS'] = df['ADDRESS_ORG'].apply(lambda x: insert_spaces(r'[0-9]+',x))

df.head()

Unnamed: 0,unicode,organisation_name,organisation_type,project,project_res_no,project_category,project_address_pincode,city,organisation_state,tehsil,website,zipcode,office_no,estimated_commencement_date,actual_commencement_date,estimated_finish_date,total_built_up_area_saleable_area,phase_area,open_area,total_area_of_project,fees_to_be_paid_to_raj_rera,number_of_apartments_plots,sanctioned_number_of_apartments_plots,land_cost_as_per_rule_5_1,development_cost_as_per_rule_5_2,building_name,block_no,full_address,month_difference_start_and_finish,floor,temp_rera,email,project_status,project_state,project_address_street,project_carpet_area,residential_projects_total_units,residential_projects_available_units,plan_passing_authority,plan_passing_authority_approved_date,Extended End Date,Project development work,Project District,land_ownership,number_of_sanctioned_building,built_up_area_as_per_proposed_fsi,permissible_built_up_area,number_of_basements,maharastra_rera_no,form_filling_project_date,ADDRESS_ORG,ADDRESS
0,Data_input_two_716500,PARAS LIFESTYLES PVT LTD,Company,PARAS GULAB VATIKA PHASE II,P-SEH-17-039,Residential/Group Housing,0.0,Sehore,,,,0.0,9826054143,,30-07-2016,30-09-2018,5200.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,,0.0,Adjacent to Paras Emperor E-8 Extension Bawadian Kalan Bhopal � 462026,26.0,0.0,,info@paraslifestyles.com,Ongoing,Madhya Pradesh,,,132.0,0.0,,,,Consultants of Project,Sehore,1. Paras Lifestyles Pvt. Ltd 2. Mr. Sudarshan Rai & Mr. Anil Rai,0.0,0.0,0.0,0.0,,,KHASRA NOS. 851/1/2/1 851/2 852/2 853/1 873/2/2 877/2 AND 879/2KH TOTALLY ADMEASURING 1.155 HECTARE (PARAS LIFESTYLES PVT LTD) AND KHASRA NOS. 852/1 873/3KH 874/5 876/1 877/1 878/1 AND 879/1 TOTALLY ADMEASURING 1.154 HECTARE =11 540 (SUDARSHAN RAI & ANIL RAI),KHASRA NOS. 851 / 1 / 2 / 1 851 / 2 852 / 2 853 / 1 873 / 2 / 2 877 / 2 AND 879 / 2 KH TOTALLY ADMEASURING 1 . 155 HECTARE (PARAS LIFESTYLES PVT LTD) AND KHASRA NOS. 852 / 1 873 / 3 KH 874 / 5 876 / 1 877 / 1 878 / 1 AND 879 / 1 TOTALLY ADMEASURING 1 . 154 HECTARE = 11 540 (SUDARSHAN RAI & ANIL RAI)
1,Data_input_383000,51 MALL,Partnership Firm,51 MALL 2,P-IND-21-2993,,0.0,Indore,,,,0.0,9826051989,,4/1/2021,4/1/2024,1820.0,0.0,0.0,,0.0,0.0,0.0,180.69,18200000.0,,0.0,123 Palika Plaza Ist Floor MTH Compound Indore,36.0,0.0,,theoutdoorindia@gmail.com,New,Madhya Pradesh,,,38.0,0.0,,,,,Indore,SELF,0.0,0.0,0.0,0.0,,,PLOT NO. 5 SCHEME NO. 51 AVANTIKA INDORE,PLOT NO. 5 SCHEME NO. 51 AVANTIKA INDORE
2,Data_input_two_410000,MAA INFRASTRUCTURE,Partnership Firm,RUDRANSH VIHAR,P-OTH-23-3839,,0.0,Badnawar,,,,0.0,9617123456,,15-06-2022,24-05-2025,1682.2,0.0,0.0,,0.0,0.0,0.0,89.92,16822000.0,,0.0,220 VIP paraspar nagar Indore (M.P.)- 452012,35.0,0.0,,maainfrastructure3@gmail.com,New,Madhya Pradesh,,,137.0,0.0,,,,,Dhar,hassan panwala' 'Gendalal mukati' ' kutbuddin hussain,0.0,0.0,0.0,0.0,,,KHASRA NO. PART OF 1397/1/1/2 1400 1398/1 GRAM KHEDA TEHSIL BADNAWER DISTRICT DHAR,KHASRA NO. PART OF 1397 / 1 / 1 / 2 1400 1398 / 1 GRAM KHEDA TEHSIL BADNAWER DISTRICT DHAR
3,Data_input_two_428000,M/S JAISHAKTI HOMES,Partnership Firm,JAI SHAKTI HOMES PHASE 4,P-HRD-18-1891,,0.0,Handiya,,,,0.0,9425042452,,22-06-2016,8/8/2019,2000.7,0.0,0.0,,0.0,0.0,0.0,187.07,20007000.0,,0.0,GIRJA SHANKAR NAGAR NEAR BYPASS ROAD HARDAS,38.0,0.0,,matushree@yahoo.com,Ongoing,Madhya Pradesh,,,78.0,0.0,,,,,Harda,Mr. Vivek Kumar Agarwal and Mr. Shobhram Gour,0.0,0.0,0.0,0.0,,,SURVEY NO. 164/5 164/6 160/19 160/34 160/35 VILLAGE GRAM HARDA KHAS TEHSIL HARDA DISTRICT HARDA (M.P.),SURVEY NO. 164 / 5 164 / 6 160 / 19 160 / 34 160 / 35 VILLAGE GRAM HARDA KHAS TEHSIL HARDA DISTRICT HARDA (M.P.)
4,Data_input_two_654000,HARISH VIJAYVARGIYA,Individual,HARI VATIKA-02,P-MNS-22-3251,,0.0,Mandsaur,,,,0.0,8085577777,,12/2/2020,12/1/2023,505.4,0.0,0.0,,0.0,0.0,0.0,20.0,5054000.0,,0.0,14 Sudama Nagar Scheme No. 01 Mandsaur M.P. 458001,35.0,0.0,,breez.harish@gmail.com,New,Madhya Pradesh,,,78.0,0.0,,,,,Mandsaur,Mr. Harish S/o Premchandraji Vijayvargiya' ' Mr. Surendra Kumar S/o Hariramji' ' Mr. Radheshyam S/o Hariramji' ' Mrs. Dakha Bai W/o Hariramji,0.0,0.0,0.0,0.0,,,SURVEY NO. 231/1 233 AT VILLAGE-TODI TEHSIL AND DIST.- MANDSAUR M.P.,SURVEY NO. 231 / 1 233 AT VILLAGE-TODI TEHSIL AND DIST.- MANDSAUR M.P.


In [17]:
df['ADDRESS_ORG'] = df['ADDRESS_ORG'].apply(lambda x: x.replace("\\","/"))
df["ADDRESS_ORG"] = df["ADDRESS_ORG"].apply(lambda z:re.sub('[^A-Za-z0-9-/]', ' ', str(z)))
df["ADDRESS"] = df["ADDRESS"].apply(lambda z:re.sub('[^A-Za-z0-9]', ' ', str(z)))
df["ADDRESS_ORG"] = df["ADDRESS_ORG"].apply(lambda z:re.sub(r'\s*([-/])\s*', r'\1', str(z)))

############################ Creating address part markers ############################
df["PLOT"] = df['FLOOR'] = df['BLOCK'] = df['STREET'] = df["POCKET"] = None

############################ Moving pincodes to another column ############################
reg=re.compile(r'(1100\d\d)')
extract_pin(df,"ADDRESS","PINCODE",reg)

############################ Validating Data samples ############################
df.head()

Unnamed: 0,unicode,organisation_name,organisation_type,project,project_res_no,project_category,project_address_pincode,city,organisation_state,tehsil,website,zipcode,office_no,estimated_commencement_date,actual_commencement_date,estimated_finish_date,total_built_up_area_saleable_area,phase_area,open_area,total_area_of_project,fees_to_be_paid_to_raj_rera,number_of_apartments_plots,sanctioned_number_of_apartments_plots,land_cost_as_per_rule_5_1,development_cost_as_per_rule_5_2,building_name,block_no,full_address,month_difference_start_and_finish,floor,temp_rera,email,project_status,project_state,project_address_street,project_carpet_area,residential_projects_total_units,residential_projects_available_units,plan_passing_authority,plan_passing_authority_approved_date,Extended End Date,Project development work,Project District,land_ownership,number_of_sanctioned_building,built_up_area_as_per_proposed_fsi,permissible_built_up_area,number_of_basements,maharastra_rera_no,form_filling_project_date,ADDRESS_ORG,ADDRESS,PLOT,FLOOR,BLOCK,STREET,POCKET,PINCODE
0,Data_input_two_716500,PARAS LIFESTYLES PVT LTD,Company,PARAS GULAB VATIKA PHASE II,P-SEH-17-039,Residential/Group Housing,0.0,Sehore,,,,0.0,9826054143,,30-07-2016,30-09-2018,5200.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,,0.0,Adjacent to Paras Emperor E-8 Extension Bawadian Kalan Bhopal � 462026,26.0,0.0,,info@paraslifestyles.com,Ongoing,Madhya Pradesh,,,132.0,0.0,,,,Consultants of Project,Sehore,1. Paras Lifestyles Pvt. Ltd 2. Mr. Sudarshan Rai & Mr. Anil Rai,0.0,0.0,0.0,0.0,,,KHASRA NOS 851/1/2/1 851/2 852/2 853/1 873/2/2 877/2 AND 879/2KH TOTALLY ADMEASURING 1 155 HECTARE PARAS LIFESTYLES PVT LTD AND KHASRA NOS 852/1 873/3KH 874/5 876/1 877/1 878/1 AND 879/1 TOTALLY ADMEASURING 1 154 HECTARE 11 540 SUDARSHAN RAI ANIL RAI,KHASRA NOS 851 1 2 1 851 2 852 2 853 1 873 2 2 877 2 AND 879 2 KH TOTALLY ADMEASURING 1 155 HECTARE PARAS LIFESTYLES PVT LTD AND KHASRA NOS 852 1 873 3 KH 874 5 876 1 877 1 878 1 AND 879 1 TOTALLY ADMEASURING 1 154 HECTARE 11 540 SUDARSHAN RAI ANIL RAI,,,,,,
1,Data_input_383000,51 MALL,Partnership Firm,51 MALL 2,P-IND-21-2993,,0.0,Indore,,,,0.0,9826051989,,4/1/2021,4/1/2024,1820.0,0.0,0.0,,0.0,0.0,0.0,180.69,18200000.0,,0.0,123 Palika Plaza Ist Floor MTH Compound Indore,36.0,0.0,,theoutdoorindia@gmail.com,New,Madhya Pradesh,,,38.0,0.0,,,,,Indore,SELF,0.0,0.0,0.0,0.0,,,PLOT NO 5 SCHEME NO 51 AVANTIKA INDORE,PLOT NO 5 SCHEME NO 51 AVANTIKA INDORE,,,,,,
2,Data_input_two_410000,MAA INFRASTRUCTURE,Partnership Firm,RUDRANSH VIHAR,P-OTH-23-3839,,0.0,Badnawar,,,,0.0,9617123456,,15-06-2022,24-05-2025,1682.2,0.0,0.0,,0.0,0.0,0.0,89.92,16822000.0,,0.0,220 VIP paraspar nagar Indore (M.P.)- 452012,35.0,0.0,,maainfrastructure3@gmail.com,New,Madhya Pradesh,,,137.0,0.0,,,,,Dhar,hassan panwala' 'Gendalal mukati' ' kutbuddin hussain,0.0,0.0,0.0,0.0,,,KHASRA NO PART OF 1397/1/1/2 1400 1398/1 GRAM KHEDA TEHSIL BADNAWER DISTRICT DHAR,KHASRA NO PART OF 1397 1 1 2 1400 1398 1 GRAM KHEDA TEHSIL BADNAWER DISTRICT DHAR,,,,,,
3,Data_input_two_428000,M/S JAISHAKTI HOMES,Partnership Firm,JAI SHAKTI HOMES PHASE 4,P-HRD-18-1891,,0.0,Handiya,,,,0.0,9425042452,,22-06-2016,8/8/2019,2000.7,0.0,0.0,,0.0,0.0,0.0,187.07,20007000.0,,0.0,GIRJA SHANKAR NAGAR NEAR BYPASS ROAD HARDAS,38.0,0.0,,matushree@yahoo.com,Ongoing,Madhya Pradesh,,,78.0,0.0,,,,,Harda,Mr. Vivek Kumar Agarwal and Mr. Shobhram Gour,0.0,0.0,0.0,0.0,,,SURVEY NO 164/5 164/6 160/19 160/34 160/35 VILLAGE GRAM HARDA KHAS TEHSIL HARDA DISTRICT HARDA M P,SURVEY NO 164 5 164 6 160 19 160 34 160 35 VILLAGE GRAM HARDA KHAS TEHSIL HARDA DISTRICT HARDA M P,,,,,,
4,Data_input_two_654000,HARISH VIJAYVARGIYA,Individual,HARI VATIKA-02,P-MNS-22-3251,,0.0,Mandsaur,,,,0.0,8085577777,,12/2/2020,12/1/2023,505.4,0.0,0.0,,0.0,0.0,0.0,20.0,5054000.0,,0.0,14 Sudama Nagar Scheme No. 01 Mandsaur M.P. 458001,35.0,0.0,,breez.harish@gmail.com,New,Madhya Pradesh,,,78.0,0.0,,,,,Mandsaur,Mr. Harish S/o Premchandraji Vijayvargiya' ' Mr. Surendra Kumar S/o Hariramji' ' Mr. Radheshyam S/o Hariramji' ' Mrs. Dakha Bai W/o Hariramji,0.0,0.0,0.0,0.0,,,SURVEY NO 231/1 233 AT VILLAGE-TODI TEHSIL AND DIST-MANDSAUR M P,SURVEY NO 231 1 233 AT VILLAGE TODI TEHSIL AND DIST MANDSAUR M P,,,,,,


In [18]:
df.sample(5)

Unnamed: 0,unicode,organisation_name,organisation_type,project,project_res_no,project_category,project_address_pincode,city,organisation_state,tehsil,website,zipcode,office_no,estimated_commencement_date,actual_commencement_date,estimated_finish_date,total_built_up_area_saleable_area,phase_area,open_area,total_area_of_project,fees_to_be_paid_to_raj_rera,number_of_apartments_plots,sanctioned_number_of_apartments_plots,land_cost_as_per_rule_5_1,development_cost_as_per_rule_5_2,building_name,block_no,full_address,month_difference_start_and_finish,floor,temp_rera,email,project_status,project_state,project_address_street,project_carpet_area,residential_projects_total_units,residential_projects_available_units,plan_passing_authority,plan_passing_authority_approved_date,Extended End Date,Project development work,Project District,land_ownership,number_of_sanctioned_building,built_up_area_as_per_proposed_fsi,permissible_built_up_area,number_of_basements,maharastra_rera_no,form_filling_project_date,ADDRESS_ORG,ADDRESS,PLOT,FLOOR,BLOCK,STREET,POCKET,PINCODE
38868,PUNE_One_196500,M/S RIGVED LIFESPACES,Partnership,RIGVED,PUNE_One_196500,Residential/Group Housing,411045.0,Pune,MAHARASHTRA,Pune,,0.0,2022222222,,9/9/2021,30-12-2022,1502.0,0.0,0.0,930.0,0.0,0.0,0.0,0.0,0.0,,0.0,PLOT NO 1 SURVEY NO 103B/1 SHIVAJINAGAR SHIVAJI HOUSING SOCIETY SENAPATI BAPAT ROAD SHIVAJINAGAR SHIVAJINARAG MAHARASHTRA 411016 Pune,0.0,8.0,,,New Project,MAHARASHTRA,BANER,,0.0,0.0,,,,,Pune,,1.0,500.0,2002.0,0.0,P52100018585,,9 MTR WIDE INTERNAL ROAD SURVEY NO 210 PRIVATE PLOT NO 4 OUT OF SURVEY NO 216 PRIVATE PLOT NO 4 OUT OF SURVEY NO 216 MAHARASHTRA BANER BANER 411045,9 MTR WIDE INTERNAL ROAD SURVEY NO 210 PRIVATE PLOT NO 4 OUT OF SURVEY NO 216 PRIVATE PLOT NO 4 OUT OF SURVEY NO 216 MAHARASHTRA BANER BANER 411045,,,,,,
2480,AHMED2S_AMra3s_AUR7S_TH1S_202000,AKSHAY ANIL JAIN,Individual,ORBIT CONSTRUCTION,AHMED2S_AMra3s_AUR7S_TH1S_202000,Residential/Group Housing,444601.0,Amravati,MAHARASHTRA,Amravati,,0.0,9422855585,,9/9/2021,30-03-2022,2283.46,0.0,0.0,815.52,0.0,0.0,0.0,0.0,0.0,,0.0,1 SHRIKRISHNA PETH KOTHARI MANSIONS AMRAVATI AMRAVATI MAHARASHTRA 444601 Amravati,0.0,8.0,,,New Project,MAHARASHTRA,"SURVEY NO. 14/1, PLOT NO. 111, 112, 113",,0.0,0.0,,,,,Amravati,,1.0,0.0,2283.46,1.0,P50300015171,,ROAD ROAD PLOT NO 110 PLOT NO 114 MAHARASHTRA NIMBHORA SURVEY NO 14/1 PLOT NO 111 112 113 444601,ROAD ROAD PLOT NO 110 PLOT NO 114 MAHARASHTRA NIMBHORA SURVEY NO 14 1 PLOT NO 111 112 113 444601,,,,,,
44195,Gandhinagar_9800,PARESH MOHANLAL CHAVDA,,SHRINAND FLORA,PR/GJ/GANDHINAGAR/GANDHINAGAR/AUDA/MAA00594/EX1/051119,Mixed,382330.0,AHMEDABAD,,,,0.0,9879551373,,5/4/2015,30-12-2020,8302.575,0.0,0.0,6131.0,0.0,0.0,0.0,0.0,0.0,,3.0,,0.0,0.0,,,Ongoing,Gujarat,,13.57 - 93.56,155.0,52.0,Ahmedabad Urban Development Authority,11/6/2017,,,,,0.0,0.0,0.0,0.0,,,SHRINAND FLORA REVENUE SURVEY NO 128 TP NO-241 FP NO 91 B/H CNG PUMP GANDHINAGAR GANDHINAGAR GUJARAT 382330,SHRINAND FLORA REVENUE SURVEY NO 128 TP NO 241 FP NO 91 B H CNG PUMP GANDHINAGAR GANDHINAGAR GUJARAT 382330,,,,,,
17949,Mumbai_Suburban_Two_564000,CHANDIWALA ENTERPRISES,Partnership,PEARL HEAVEN IV,Mumbai_Suburban_Two_564000,Residential/Group Housing,400069.0,Konkan,MAHARASHTRA,Konkan,,0.0,2226288813,,7/4/2021,31-12-2026,7406.29,0.0,0.0,1926.42,0.0,0.0,0.0,0.0,0.0,,0.0,"222A, 1ST FLOOR AL-MOONAZ ARCADE S.V.ROAD ANDHERI WEST OPP. ANDHERI POST OFFICE MAHARASHTRA 400058 Mumbai Suburban",0.0,19.0,,,New Project,MAHARASHTRA,KONDIVITA VILLAGE,,0.0,0.0,,,,,Mumbai Suburban,,1.0,5278.86,12685.15,1.0,P51800029797,,CTS 57 58 100 KONDIVITA VILLAGE S V ROAD CTS 60 64A2 CTS 33 34 AND 38 CTS 60 AND 90A MAHARASHTRA ANDHERI EAST KONDIVITA VILLAGE 400069,CTS 57 58 100 KONDIVITA VILLAGE S V ROAD CTS 60 64 A 2 CTS 33 34 AND 38 CTS 60 AND 90 A MAHARASHTRA ANDHERI EAST KONDIVITA VILLAGE 400069,,,,,,
7360,Mumbai_Suburban_One_123500,Black Gold Developers Pvt Ltd,Company,Sheela Smruti CHSL,Mumbai_Suburban_One_123500,Residential/Group Housing,400057.0,Konkan,MAHARASHTRA,Konkan,,0.0,2226842311,,8/9/2017,4/5/2020,821.08,0.0,0.0,477.2,0.0,0.0,0.0,0.0,0.0,,0.0,1 Smruti Building M V Pandloskar Marg Vile Parle (E) Near Shivaji Vidyalaya MAHARASHTRA 400057 Mumbai Suburban,0.0,8.0,,,New Project,MAHARASHTRA,,,0.0,0.0,,,,,Mumbai Suburban,,1.0,467.31,1288.39,1.0,P51800004201,9/9/2021,TEJPAL SCHEME RD NO 3 PLOT BEARING CTS NO 516 PLOT BEARING CTS NO 506 PLOT BEARING CTS NO 508 MAHARASHTRA 400057,TEJPAL SCHEME RD NO 3 PLOT BEARING CTS NO 516 PLOT BEARING CTS NO 506 PLOT BEARING CTS NO 508 MAHARASHTRA 400057,,,,,,


Removing unnecessary info from the address


In [19]:
remov_list(df,'ADDRESS')

ADDRESS processed: 100%|[32m██████████[0m| 15/15 [00:00<00:00, 25.06it/s]


Replacing known words and abbreviation in address from repl_dict


In [20]:
for k in tqdm(repl_dict.keys(),desc="Replacing Words from repl_dict",colour='green'):
    df['ADDRESS'] = df['ADDRESS'].apply(lambda x:x.replace(' ' + k + ' ',' ' + repl_dict[k] + ' '))
df.reset_index(drop=True,inplace=True)

Replacing Words from repl_dict: 100%|[32m██████████[0m| 592/592 [00:27<00:00, 21.58it/s]


In [21]:
df.isna().sum()

unicode                                      0
organisation_name                          141
organisation_type                        12937
project                                     25
project_res_no                              20
project_category                          3511
project_address_pincode                      0
city                                      3789
organisation_state                       14394
tehsil                                   17481
website                                  51292
zipcode                                      0
office_no                                 1965
estimated_commencement_date              49682
actual_commencement_date                  7880
estimated_finish_date                       23
total_built_up_area_saleable_area          143
phase_area                                   0
open_area                                    0
total_area_of_project                     3877
fees_to_be_paid_to_raj_rera                  0
number_of_apa

Segregating address markers


In [22]:
%%time
df["PLOT"]=df["ADDRESS"].apply(lambda z: fetch_PLOT(z))

df['FLOOR']= df['ADDRESS'].apply(lambda x: fetch_FLOOR(x)[0])
df['ADDRESS'] = df['ADDRESS'].apply(lambda x:fetch_FLOOR(x)[1])

df['STREET'] = df['ADDRESS'].apply(lambda x: fetch_STREET(x)[0])
df['ADDRESS'] = df['ADDRESS'].apply(lambda x:fetch_STREET(x)[1])

df['POCKET'] = df['ADDRESS'].apply(lambda x: fetch_POCKET(x)[0])
df['ADDRESS_ORG'] = df['ADDRESS_ORG'].apply(lambda x:fetch_POCKET(x)[1])

df['BLOCK'] = df['ADDRESS'].apply(lambda x: fetch_BLOCK(x)[0])
df['ADDRESS_ORG'] = df['ADDRESS_ORG'].apply(lambda x:fetch_BLOCK(x)[1])

CPU times: user 2.03 s, sys: 22.9 ms, total: 2.06 s
Wall time: 2.06 s


In [23]:
df.isna().sum()

unicode                                      0
organisation_name                          141
organisation_type                        12937
project                                     25
project_res_no                              20
project_category                          3511
project_address_pincode                      0
city                                      3789
organisation_state                       14394
tehsil                                   17481
website                                  51292
zipcode                                      0
office_no                                 1965
estimated_commencement_date              49682
actual_commencement_date                  7880
estimated_finish_date                       23
total_built_up_area_saleable_area          143
phase_area                                   0
open_area                                    0
total_area_of_project                     3877
fees_to_be_paid_to_raj_rera                  0
number_of_apa

Replacing all similar words with the most frequent one


creating a word corpus [df_corpus] and making various columns for it

In [24]:
w = fetch_words(df,'ADDRESS', all_words=True)
df_corpus = pd.DataFrame(zip(w.keys(),w.values()),columns = ['WORD','WORD_COUNT'])
df_corpus['WORD_TYPE'] = df_corpus['WORD'].apply(lambda z:ifnum(z))
df_corpus['WORD_LENGTH'] = df_corpus['WORD'].apply(lambda z:len(z))
df_corpus["WORD"] = df_corpus["WORD"].apply(lambda z:re.sub(r'NULLNULL\b', ' ', str(z)))
newPrint(str(df_corpus.shape))
df_corpus.head()

### <font color=green>(31936, 4)</font> <font color=red><b><em></em></b></font>

Unnamed: 0,WORD,WORD_COUNT,WORD_TYPE,WORD_LENGTH
0,KHASRA,5502,NonNumeric,6
1,NOS,176,NonNumeric,3
2,851,14,Numeric,3
3,1,14773,Numeric,1
4,2,12378,Numeric,1


In [25]:
df.sample(5)

Unnamed: 0,unicode,organisation_name,organisation_type,project,project_res_no,project_category,project_address_pincode,city,organisation_state,tehsil,website,zipcode,office_no,estimated_commencement_date,actual_commencement_date,estimated_finish_date,total_built_up_area_saleable_area,phase_area,open_area,total_area_of_project,fees_to_be_paid_to_raj_rera,number_of_apartments_plots,sanctioned_number_of_apartments_plots,land_cost_as_per_rule_5_1,development_cost_as_per_rule_5_2,building_name,block_no,full_address,month_difference_start_and_finish,floor,temp_rera,email,project_status,project_state,project_address_street,project_carpet_area,residential_projects_total_units,residential_projects_available_units,plan_passing_authority,plan_passing_authority_approved_date,Extended End Date,Project development work,Project District,land_ownership,number_of_sanctioned_building,built_up_area_as_per_proposed_fsi,permissible_built_up_area,number_of_basements,maharastra_rera_no,form_filling_project_date,ADDRESS_ORG,ADDRESS,PLOT,FLOOR,BLOCK,STREET,POCKET,PINCODE
19721,Vadodara_92600,PRAMUKH ASSOCIATES (K.L.PATEL),,"Mangla Marvel Tower B-1, B-2, B-3",PR/GJ/VADODARA/VADODARA/Others/RAA04170/291118,Residential/Group Housing,390010.0,VADODARA,,,,0.0,7575040321,,4/1/2018,30-06-2021,3300.0,0.0,0.0,6780.0,0.0,0.0,0.0,0.0,0.0,,1.0,,0.0,0.0,,,New,Gujarat,,55 - 55,60.0,0.0,Vadodara Municipal Corporation,,,,,,0.0,0.0,0.0,0.0,,,NEAR UMA VIDYALAYA TARSALI VADODARA VADODARA GUJARAT 390010,UMA VIDYALAYA TARSALI VADODARA VADODARA GUJARAT 390010,,,,,,
26612,Pune_Six_415500,SWARAAJ REALITIES,Partnership,Swaraaj Paradise Ph3,Pune_Six_415500,Others,411027.0,Pune,MAHARASHTRA,Pune,,0.0,2027654012,,2/7/2023,31-12-2025,2446.88,0.0,200.0,1900.0,0.0,0.0,0.0,0.0,0.0,,0.0,Plot No 89 SECTOR 27 (JANTA) SHIVAM NIGDI PRADHIKARAN OPP EXCISE BHAVAN MAHARASHTRA 411044 Pune,0.0,6.0,,,New Project,MAHARASHTRA,Kokane Chowk,,0.0,0.0,,,,,Pune,,2.0,5000.8,7447.68,1.0,P52100049387,,SURVEY NO 166 SURVEY NO 123 SURVEY NO 168 SURVEY NO 167 B PART MAHARASHTRA NEAR RELIANCE MALL KOKANE CHOWK 411027,SURVEY NO 166 SURVEY NO 123 SURVEY NO 168 SURVEY NO 167 B PART MAHARASHTRA RELIANCE MALL KOKANE CHOWK 411027,166,,,,,
39714,Ahemdabad_Surat_366800,Jayesh A Dalal,,Construction Of 660 DUs Of EWS-II Type At T.P.28(RUNDH-VESU) F.P.32/P-SUB PLOT NO-2 Under PMAY,PR/GJ/SURAT/SURAT CITY/SUDA/RAA03348/EX2/100221,Residential/Group Housing,395007.0,SURAT,,,,0.0,9724345000,,6/11/2017,30-12-2021,22294.8,0.0,0.0,10470.0,0.0,0.0,0.0,0.0,0.0,,5.0,,0.0,0.0,,,New,Gujarat,,33.78 - 33.78,660.0,0.0,Surat Municipal Corporation,9/4/2018,,,,,0.0,0.0,0.0,0.0,,,T P 28 RUNDH-VESU F P 32/P-SUB PLOT NO-2 SURAT CITY SURAT GUJARAT 395007,T P 28 RUNDH VESU F P 32 P SUB PLOT 2 SURAT CITY SURAT GUJARAT 395007,28,,,,,
47483,Vadodara_112700,"VINUBHAI PATEL, ZARNA ASSOCIATES",,V R ONE,PR/GJ/VADODARA/VADODARA/Others/CAA05246/A1C/200121,Commercial,390019.0,VADODARA,,,,0.0,9825650292,,22-01-2019,30-03-2023,27686.565,0.0,0.0,6687.0,0.0,0.0,0.0,0.0,0.0,,1.0,,0.0,0.0,,,New,Gujarat,,10.74 - 196.65,267.0,117.0,Vadodara Municipal Corporation,,,,,,0.0,0.0,0.0,0.0,,,RS NO 400 1 AND 510 VADODARA VADODARA GUJARAT 390019,RS NO 400 1 AND 510 VADODARA VADODARA GUJARAT 390019,400 1,,,,,
6264,Mumbai_Suburban_One_827500,Rustomjee Realty Private Limited,Company,Rustomjee Elements - Wing SC,Mumbai_Suburban_One_827500,Residential/Group Housing,400053.0,Konkan,MAHARASHTRA,Konkan,,0.0,2266766888,,,31-12-2019,8881.95,0.0,0.0,863.31,0.0,0.0,0.0,0.0,0.0,,0.0,"702, 7th Floor Natraj by Rustomjee MV Road Junction, Western Express Highway Andheri East Vishal Hall MAHARASHTRA 400069 Mumbai Suburban",0.0,13.0,,,On-Going Project,MAHARASHTRA,D. N. Nagar,,0.0,0.0,,,30-06-2021,,Mumbai Suburban,,1.0,291.97,9173.92,0.0,,,LATITUDE 19D07M06PT941S LONGITUDE 72D49S46PT778S LATITUDE 19D07M07PT733S LONGITUDE 72D49M45PT472S LATITUDE 19D07M08PT155S LONGITUDE 72D49M46PT751S LATITUDE 19D07M06PT781S LONGITUDE 72D49M46PT327S MAHARASHTRA D N NAGAR D N NAGAR 400053,LATITUDE 19 D 07 M 06 PT 941 S LONGITUDE 72 D 49 S 46 PT 778 S LATITUDE 19 D 07 M 07 PT 733 S LONGITUDE 72 D 49 M 45 PT 472 S LATITUDE 19 D 07 M 08 PT 155 S LONGITUDE 72 D 49 M 46 PT 751 S LATITUDE 19 D 07 M 06 PT 781 S LONGITUDE 72 D 49 M 46 PT 327 S MAHARASHTRA D N NAGAR D N NAGAR 400053,19,,,,,


Making a word only Dataframe [wordonly_corpus]


In [26]:
thresh_list=word_size_list=word_list =  []
############################ Customizing wordonly dataframe ############################
# wordonly_corpus = df_corpus[(df_corpus['WORD_TYPE']=='NonNumeric')&(df_corpus['WORD_LENGTH']>=3)][["WORD","WORD_LENGTH",'WORD_COUNT']]
wordonly_corpus = df_corpus[(df_corpus['WORD_TYPE']=='NonNumeric')&(df_corpus['WORD_LENGTH']>=3)][["WORD","WORD_LENGTH",'WORD_COUNT']].sort_values(by='WORD_LENGTH',ascending=0)
wordonly_corpus["FZ_THRESH"] = df_corpus["WORD_LENGTH"].apply(lambda z: int(100-100/(z/2)))  #Adding the calculated fuzzy threshold values for each word to a column
wordonly_corpus["NO. OF MATCHES"] = 0
wordonly_corpus["BEST_MATCHES"] = ""
############################ exporting column to a various different lists ############################
thresh_list = wordonly_corpus["FZ_THRESH"].tolist()
word_count_dict = wordonly_corpus["WORD_COUNT"].tolist()
word_list = wordonly_corpus["WORD"].tolist()
word_size_list =  wordonly_corpus["WORD_LENGTH"].tolist()
word_list = [str.strip(i) for i in word_list]
# word_count_dict = [list(item) for item in zip(word_list , word_count_dict)]
word_count_dict = dict(zip(word_list,word_count_dict))
############################ Resetting and losing weights from dataframe ############################
wordonly_corpus.drop(["WORD_LENGTH","FZ_THRESH",'WORD_COUNT'],axis=1,inplace=True)
wordonly_corpus.reset_index(drop=True, inplace=True)
############################ Removing NULL from end of words ############################
wordonly_corpus["WORD"] = wordonly_corpus["WORD"].apply(lambda z:re.sub(r'NULLNULL\b', ' ', str(z)))

wordonly_corpus.head()

Unnamed: 0,WORD,NO. OF MATCHES,BEST_MATCHES
0,ATVILLAGEJUGALPURATEHSILRAJGARHDISTRICTRAJGARH,0,
1,ROADSAITIRUPATIGREENSPHASEIII,0,
2,NAVJAVANSAHAKARIGRUHARACHANA,0,
3,SAHAKARIGRUHARACHANASOCIETY,0,
4,ANDPARTPORTIONOFPOTNO,0,


Finding similar words in corpus through fuzzy and adding to the word only dataframe


In [27]:
rowindex = 0

for i in tqdm(range(len(word_size_list)), desc="Processing words", colour="green"):
    best_matches = []
    no_of_matches = 0

    for j in range(len(word_size_list)):
        if word_list[j] == word_list[i] or (word_size_list[j] > word_size_list[i]+2):
            continue
        elif word_size_list[j] < (word_size_list[i]-2):
            break
        else:
            score = fuzz.ratio(word_list[i], word_list[j])
            if score >= max(83, thresh_list[i]):
                best_matches.append(word_list[j])
                no_of_matches += 1
            else:
                continue

    if len(best_matches) > 0:
        wordonly_corpus.loc[i, "BEST_MATCHES"] = " ".join(best_matches)
        wordonly_corpus.loc[i, "NO. OF MATCHES"] = no_of_matches
    else:
        continue

    rowindex += 1  # This incrementation should probably be placed within the loop to be accurate

Processing words: 100%|[32m██████████[0m| 24550/24550 [2:42:25<00:00,  2.52it/s]


In [28]:
wordonly_corpus.head(15)


Unnamed: 0,WORD,NO. OF MATCHES,BEST_MATCHES
0,ATVILLAGEJUGALPURATEHSILRAJGARHDISTRICTRAJGARH,0,
1,ROADSAITIRUPATIGREENSPHASEIII,0,
2,NAVJAVANSAHAKARIGRUHARACHANA,0,
3,SAHAKARIGRUHARACHANASOCIETY,0,
4,ANDPARTPORTIONOFPOTNO,0,
5,JOSHIGURADHIYATAHSIL,0,
6,PROPERTYOFMRDESHMUKH,0,
7,VIJAYSITARAMGUMATKAR,0,
8,MAHESHWARNIKETANCHS,0,
9,IRRIGATIONBABYCANAL,0,


In [29]:
df.sample(5)

Unnamed: 0,unicode,organisation_name,organisation_type,project,project_res_no,project_category,project_address_pincode,city,organisation_state,tehsil,website,zipcode,office_no,estimated_commencement_date,actual_commencement_date,estimated_finish_date,total_built_up_area_saleable_area,phase_area,open_area,total_area_of_project,fees_to_be_paid_to_raj_rera,number_of_apartments_plots,sanctioned_number_of_apartments_plots,land_cost_as_per_rule_5_1,development_cost_as_per_rule_5_2,building_name,block_no,full_address,month_difference_start_and_finish,floor,temp_rera,email,project_status,project_state,project_address_street,project_carpet_area,residential_projects_total_units,residential_projects_available_units,plan_passing_authority,plan_passing_authority_approved_date,Extended End Date,Project development work,Project District,land_ownership,number_of_sanctioned_building,built_up_area_as_per_proposed_fsi,permissible_built_up_area,number_of_basements,maharastra_rera_no,form_filling_project_date,ADDRESS_ORG,ADDRESS,PLOT,FLOOR,BLOCK,STREET,POCKET,PINCODE
44829,Ahemdabad_Surat_9900,ACHAL PARIKH CONSULTING ENGINEERS,,AAVKAR HEIGHT,PR/GJ/AHMEDABAD/AHMEDABAD CITY/AUDA/MAA03278/300718,Mixed,382421.0,GANDHI NAGAR,,,,0.0,7016613325,,6/7/2013,15-09-2018,4039.92,0.0,0.0,2551.0,0.0,0.0,0.0,0.0,0.0,,2.0,,0.0,0.0,,,Ongoing,Gujarat,,12.55 - 52.61,124.0,0.0,Ahmedabad Urban Development Authority,30-07-2018,,,,,0.0,0.0,0.0,0.0,,,AAVKAR HEIGHT B/H SATYAMEV HOSPITAL AHMEDABAD CITY AHMEDABAD GUJARAT 382421,AAVKAR HEIGHT B H SATYAMEV HOSPITAL AHMEDABAD CITY AHMEDABAD GUJARAT 382421,,,,,,
45577,NASIK11S_AHMED1S_735000,Kothari Housing Company,Partnership,Aarambh Elite,NASIK11S_AHMED1S_735000,Others,422222.0,Nashik,MAHARASHTRA,Nashik,,0.0,9145010203,,20/03/2023,30-06-2027,3192.68,0.0,0.0,1740.4,0.0,0.0,0.0,0.0,0.0,,0.0,Aarambh Gangapur road Apartment Gangapur gaon near SBI Gangapur Gaon Branch MAHARASHTRA 422222 Nashik,0.0,13.0,,,New Project,MAHARASHTRA,Gangapur road,,0.0,0.0,,,,,Nashik,,1.0,0.0,3192.68,1.0,P51600046518,,BY 30 MTRS WIDE DP ROAD BY PLOT NO 5-6 BY PLOT NO 4 BY 15 MTRSWIDE ROAD MAHARASHTRA GANGAPUR ROAD GANGAPUR ROAD 422222,BY 30 MTRS WIDE DP ROAD BY PLOT 5 6 BY PLOT 4 BY 15 MTRSWIDE ROAD MAHARASHTRA GANGAPUR ROAD GANGAPUR ROAD 422222,30.0,,,,,
12035,RAIGARH_Two_325000,NAVKAR CREATIONS,Partnership,TANISHKA ROYALE,RAIGARH_Two_325000,Others,410203.0,Konkan,MAHARASHTRA,Konkan,,0.0,9370261567,,9/8/2021,10/7/2023,849.93,0.0,624.31,624.31,0.0,0.0,0.0,0.0,0.0,,0.0,SHOP NO 06 AYESHA COMPLEX SHILPHATA KHOPOLI KHOPOLI MAHARASHTRA 410203 Raigarh,0.0,4.0,,,New Project,MAHARASHTRA,KHOPOLI,,0.0,0.0,,,,,Raigarh,,1.0,0.0,849.93,1.0,P52000027776,,ROAD ROAD RAJENDRA LODGE ROAD MAHARASHTRA KHOPOLI KHOPOLI 410203,ROAD ROAD RAJENDRA LODGE ROAD MAHARASHTRA KHOPOLI KHOPOLI 410203,,,,,,
3881,RAIGARH_Two_854500,Shreeram Builders & Developers,Partnership,Aarambh Residency C1 D1,RAIGARH_Two_854500,Residential/Group Housing,410101.0,Konkan,MAHARASHTRA,Konkan,,0.0,2227796161,,9/8/2021,5/7/2019,4405.0,0.0,400.0,3863.33,0.0,0.0,0.0,0.0,0.0,,0.0,T-230 Yashodham Sector-4 Airoli Durga mata mandir MAHARASHTRA 400708 Thane,0.0,9.0,,,On-Going Project,MAHARASHTRA,,,0.0,0.0,,,5/7/2019,,Raigarh,,2.0,0.0,4405.0,1.0,P50500024669,,SURVEY NUMBER 10 VILLAGE ROAD SURVEY NUMBER 39 VILLAGE GAOTHAN AND SCHOOL MAHARASHTRA 410101,SURVEY NUMBER 10 VILLAGE ROAD SURVEY NUMBER 39 VILLAGE GAOTHAN AND SCHOOL MAHARASHTRA 410101,10.0,,,,,
30315,Vadodara_4900,AASUTOSH A DESAI,,Akshar Green,PR/GJ/VADODARA/VADODARA/Others/RAA03309/010818,Residential/Group Housing,390012.0,VADODARA,,,,0.0,9825117545,,30-11-2011,31-10-2019,1471.3,0.0,0.0,20922.88,0.0,0.0,0.0,0.0,0.0,,1.0,,0.0,0.0,,,Ongoing,Gujarat,,47.22 - 99.91,20.0,6.0,Vadodara Municipal Corporation,,,,,,0.0,0.0,0.0,0.0,,,AKSHAR GREEN VADODARA VADODARA GUJARAT 390012,AKSHAR GREEN VADODARA VADODARA GUJARAT 390012,,,,,,
