In [1]:
import pandas as pd
from pandas_ods_reader import read_ods

# Data Loading

In [2]:
base_path = r"./host_uniq/old_prd_vm.ods"
sheet = 'Sheet1'

df = read_ods(base_path, sheet, headers=True)

In [3]:
df = df[['Name, instance-id, uuid', 'Host']]

In [4]:
df = df.set_axis(["instance", "host"], axis=1)

In [5]:
df_no_host = df[df["host"] == 'null']

In [6]:
df_no_host.count()

instance    14
host        14
dtype: int64

In [7]:
df2 = df[df["host"] != 'null']

In [10]:
df2.count()

instance    524
host        524
dtype: int64

In [11]:
df2.head()

Unnamed: 0,instance,host
0,ccsblackduckp01,krtmsprdcomd15.krtms
1,ccsglbkmsp01,krtmsprdcomc05.krtms
2,ccsglbkmsp02,krtmsprdcomd23.krtms
3,ccsglbvpkihqdbp01,krtmsprdcomh23.krtms
4,ccsglbvpkihqdbp02,krtmsprdcomc10.krtms


In [12]:
df2['host-instance'] = df2.apply(lambda row: row.host + "." + row.instance, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['host-instance'] = df2.apply(lambda row: row.host + "." + row.instance, axis=1)


In [13]:
df2.head()

Unnamed: 0,instance,host,host-instance
0,ccsblackduckp01,krtmsprdcomd15.krtms,krtmsprdcomd15.krtms.ccsblackduckp01
1,ccsglbkmsp01,krtmsprdcomc05.krtms,krtmsprdcomc05.krtms.ccsglbkmsp01
2,ccsglbkmsp02,krtmsprdcomd23.krtms,krtmsprdcomd23.krtms.ccsglbkmsp02
3,ccsglbvpkihqdbp01,krtmsprdcomh23.krtms,krtmsprdcomh23.krtms.ccsglbvpkihqdbp01
4,ccsglbvpkihqdbp02,krtmsprdcomc10.krtms,krtmsprdcomc10.krtms.ccsglbvpkihqdbp02


In [14]:
df2['host-instance-trim'] = df2.apply(lambda row: row.host + "." + row.instance[:-2], axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['host-instance-trim'] = df2.apply(lambda row: row.host + "." + row.instance[:-2], axis=1)


In [15]:
df2.head()

Unnamed: 0,instance,host,host-instance,host-instance-trim
0,ccsblackduckp01,krtmsprdcomd15.krtms,krtmsprdcomd15.krtms.ccsblackduckp01,krtmsprdcomd15.krtms.ccsblackduckp
1,ccsglbkmsp01,krtmsprdcomc05.krtms,krtmsprdcomc05.krtms.ccsglbkmsp01,krtmsprdcomc05.krtms.ccsglbkmsp
2,ccsglbkmsp02,krtmsprdcomd23.krtms,krtmsprdcomd23.krtms.ccsglbkmsp02,krtmsprdcomd23.krtms.ccsglbkmsp
3,ccsglbvpkihqdbp01,krtmsprdcomh23.krtms,krtmsprdcomh23.krtms.ccsglbvpkihqdbp01,krtmsprdcomh23.krtms.ccsglbvpkihqdbp
4,ccsglbvpkihqdbp02,krtmsprdcomc10.krtms,krtmsprdcomc10.krtms.ccsglbvpkihqdbp02,krtmsprdcomc10.krtms.ccsglbvpkihqdbp


In [16]:
df2['host-instance-trim'].describe()

count                                        524
unique                                       491
top       krtmsprdcomd20.krtms.ccskrlmslogstashp
freq                                           4
Name: host-instance-trim, dtype: object

In [17]:
df2[df2['host-instance-trim'].duplicated()].count()

instance              33
host                  33
host-instance         33
host-instance-trim    33
dtype: int64

In [18]:
df3 = df2[df2['host-instance-trim'].duplicated()][['host-instance-trim', 'host-instance']]

In [19]:
df3.count()

host-instance-trim    33
host-instance         33
dtype: int64

In [20]:
df3

Unnamed: 0,host-instance-trim,host-instance
56,krtmsprdcomq03.krtms.ccskrccspapigwp,krtmsprdcomq03.krtms.ccskrccspapigwp03
67,krtmsprdcomd21.krtms.ccskrccspdsfludentp,krtmsprdcomd21.krtms.ccskrccspdsfludentp02
85,krtmsprdcomh24.krtms.ccskrccspgtcwasp,krtmsprdcomh24.krtms.ccskrccspgtcwasp05
95,krtmsprdcomh07.krtms.ccskrccspkafkap,krtmsprdcomh07.krtms.ccskrccspkafkap04
96,krtmsprdcomh05.krtms.ccskrccspkafkap,krtmsprdcomh05.krtms.ccskrccspkafkap05
112,krtmsprdcomc05.krtms.ccskrccspredisp,krtmsprdcomc05.krtms.ccskrccspredisp03
141,krtmsprdcomd21.krtms.ccskrccspzoop,krtmsprdcomd21.krtms.ccskrccspzoop02
151,krtmsprdcomc11.krtms.ccskrdynatracemanagerp,krtmsprdcomc11.krtms.ccskrdynatracemanagerp03
180,krtmsprdcomq01.krtms.ccskrextissbsp,krtmsprdcomq01.krtms.ccskrextissbsp03
181,krtmsprdcomq02.krtms.ccskrextissbsp,krtmsprdcomq02.krtms.ccskrextissbsp04


In [21]:
df3.count()

host-instance-trim    33
host-instance         33
dtype: int64

In [22]:
list = df2['host-instance'].tolist()

In [23]:
searchkey = df3['host-instance-trim'].tolist()

In [24]:
collection = []

for item in list:
    if any(key in item for key in searchkey):
        collection.append(item)

In [25]:
len(collection)

62

In [26]:
df_result = pd.DataFrame(collection)

In [27]:
df_result.to_csv('oldprd_2-or-more-instances-per-host.csv')

In [28]:
df_result.count()

0    62
dtype: int64

In [29]:
df_result

Unnamed: 0,0
0,krtmsprdcomq03.krtms.ccskrccspapigwp01
1,krtmsprdcomq03.krtms.ccskrccspapigwp03
2,krtmsprdcomd21.krtms.ccskrccspdsfludentp01
3,krtmsprdcomd21.krtms.ccskrccspdsfludentp02
4,krtmsprdcomh24.krtms.ccskrccspgtcwasp02
...,...
57,krtmsprdcomc09.krtms.ccskrsvchubcorewp04
58,krtmsprdcomh11.krtms.ccskrsvchubigwmp01
59,krtmsprdcomh11.krtms.ccskrsvchubigwmp03
60,krtmsprdcomh03.krtms.ccskrvrsbsp03
