In [1]:
def process_df(path):

	# Read Biogrid data into pandas dataframe
	df = pd.read_table(path, engine="c")

	# Rename first two columns of dataframe
	df = df.rename(columns={'#ID Interactor A': 'ID Interactor A'})
	df['ID Interactor A'] = df['ID Interactor A'].map(lambda x: x.lstrip('entrez gene/locuslink:'))
	df['ID Interactor B'] = df['ID Interactor B'].map(lambda x: x.lstrip('entrez gene/locuslink:'))

	# Remove non-human ppi from the dataframe
	df = df[df['Taxid Interactor A'].isin(['taxid:9606'])]
	df = df[df['Taxid Interactor B'].isin(['taxid:9606'])]

	# Remove self interactions
	df = df[df['ID Interactor A'] != df['ID Interactor B']]

	return df

In [2]:
from __future__ import print_function
from __future__ import absolute_import
from __future__ import division

import numpy as np
import pandas as pd
import networkx as nx
import copy
import random
from networkx.readwrite import json_graph
import json
import itertools

In [3]:
names = np.load('../arrays/prediction/predictnames_inter55m.npy')

labels = np.load('../PREDICTIONS_BIN.npy').astype(str)
#labels = np.expand_dims(np.random.choice([0, 1], size=(len(names),), p=[0.998, 0.002]).astype(str),axis=1)

nam_lab = np.concatenate((names,labels), axis=1)

In [4]:
allnames = np.load('../arrays/prediction/allnames19.npy')
newnames = np.load('../arrays/prediction/newnames3.npy')
df = pd.DataFrame(nam_lab)

In [5]:
existing_df = process_df("../../BIOGRID-ORGANISM-Homo_sapiens-3.4.147.mitab.txt")

existing_df = pd.concat((existing_df['ID Interactor A'],existing_df['ID Interactor B']),axis=1)

existing_df.columns = [0,1]

full_df = pd.concat((existing_df, df[df[2] == '1']), axis=0)

In [6]:
#G = nx.from_pandas_dataframe(df[df[2] == '1'], 0,1, create_using=nx.Graph())
G = nx.from_pandas_dataframe(full_df, 0,1, create_using=nx.Graph())
newG = nx.from_pandas_dataframe(df[df[2] == '1'], 0,1, create_using=nx.Graph())

In [63]:
degreelist = list(newG.degree())
sortdl = sorted(degreelist, key=newG.degree().get, reverse=True)
c=-1
for i in sortdl:
    c+=1
    print(c,i,newG.degree()[i])

0 89832 3943
1 101929970 3415
2 1135 3413
3 1140 3413
4 1141 3413
5 3359 3405
6 57053 3399
7 8973 3396
8 9177 3271
9 6564 2817
10 6423 2659
11 100289279 2577
12 6709 2513
13 105369239 2377
14 2570 2113
15 441024 2069
16 6708 1958
17 11064 1772
18 55604 1734
19 3912 1727
20 2833 1704
21 10095 1499
22 6334 1488
23 347 1484
24 5979 1479
25 7294 1475
26 10351 1417
27 26154 1417
28 79007 1372
29 8406 1351
30 246100 1280
31 63950 1278
32 80164 1278
33 6206 1257
34 4677 1218
35 10840 1211
36 477 1206
37 79875 1202
38 3973 1196
39 100533105 1157
40 4058 1134
41 4145 1125
42 7253 1119
43 23620 1118
44 55366 1118
45 118461 1109
46 389333 1109
47 2565 1106
48 2170 1105
49 640 1100
50 10394 1092
51 9037 1088
52 388698 1086
53 10350 1081
54 5549 1064
55 5859 1061
56 2110 1058
57 101927446 1047
58 5108 1020
59 64422 1016
60 657 1012
61 51742 942
62 369 939
63 8992 912
64 55729 903
65 60495 899
66 100130827 898
67 102800317 898
68 646643 898
69 85320 888
70 2534 886
71 223117 884
72 139728 883
73 285

752 30001 167
753 55330 167
754 8705 167
755 1809 167
756 84033 167
757 283446 167
758 3808 167
759 3809 167
760 553128 167
761 57292 167
762 79612 166
763 25 166
764 100528017 166
765 102723553 166
766 3084 165
767 2185 165
768 125704 165
769 115548 164
770 2034 164
771 4543 164
772 55630 164
773 124923 164
774 56999 164
775 654463 164
776 80070 164
777 59352 163
778 1386 163
779 3932 163
780 100885848 163
781 147183 163
782 1581 163
783 164633 163
784 3425 163
785 342574 163
786 83698 163
787 128710 162
788 79657 162
789 117 162
790 51167 162
791 90668 161
792 3565 161
793 196463 161
794 6652 161
795 157855 161
796 5918 161
797 642597 161
798 23397 160
799 64983 160
800 27347 159
801 2140 159
802 8997 159
803 2994 159
804 57699 159
805 9365 159
806 3363 158
807 7343 158
808 9751 158
809 57670 156
810 114789 156
811 100506012 156
812 7483 156
813 9632 155
814 775 155
815 5932 155
816 8891 155
817 197135 155
818 257236 155
819 27094 155
820 729 154
821 158234 154
822 5598 154
823 57685

1570 164118 73
1571 22829 73
1572 2830 73
1573 283383 73
1574 375189 73
1575 56979 73
1576 611 73
1577 643923 73
1578 93655 73
1579 51234 72
1580 55103 72
1581 9002 72
1582 5605 72
1583 64151 72
1584 54949 72
1585 4927 72
1586 10561 72
1587 347404 72
1588 644100 72
1589 728597 72
1590 339855 71
1591 23331 71
1592 38 71
1593 11051 71
1594 23022 71
1595 30836 71
1596 483 71
1597 100507424 71
1598 123096 71
1599 139422 71
1600 149018 71
1601 27189 71
1602 283130 71
1603 345193 71
1604 53342 71
1605 54877 71
1606 574414 71
1607 645864 71
1608 112936 70
1609 22981 70
1610 51617 70
1611 55915 70
1612 8034 70
1613 6189 70
1614 10056 70
1615 1808 70
1616 3157 70
1617 3735 70
1618 102724334 70
1619 1084 70
1620 1089 70
1621 114827 70
1622 146802 70
1623 158983 70
1624 199699 70
1625 54209 70
1626 56971 70
1627 64208 70
1628 79746 70
1629 80341 70
1630 9625 69
1631 11155 69
1632 387082 69
1633 9360 69
1634 1073 69
1635 5905 69
1636 3098 69
1637 57470 69
1638 5792 69
1639 11095 69
1640 11096 69
1

2277 23225 47
2278 51699 47
2279 6259 47
2280 7112 47
2281 10999 47
2282 1544 47
2283 283455 47
2284 54468 47
2285 9349 47
2286 9551 47
2287 11182 47
2288 114134 47
2289 124857 47
2290 128272 47
2291 155184 47
2292 53345 47
2293 66035 47
2294 79981 47
2295 81031 47
2296 84891 47
2297 100533496 46
2298 554235 46
2299 23363 46
2300 337867 46
2301 51517 46
2302 5538 46
2303 150365 46
2304 23753 46
2305 26287 46
2306 51606 46
2307 80221 46
2308 8175 46
2309 105373378 46
2310 10838 46
2311 109504726 46
2312 116412 46
2313 124961 46
2314 126069 46
2315 127665 46
2316 128611 46
2317 136259 46
2318 148979 46
2319 160365 46
2320 163115 46
2321 169834 46
2322 195828 46
2323 220929 46
2324 22834 46
2325 257101 46
2326 25850 46
2327 26239 46
2328 2738 46
2329 284346 46
2330 285349 46
2331 347736 46
2332 353131 46
2333 353133 46
2334 353134 46
2335 353135 46
2336 353137 46
2337 353140 46
2338 353143 46
2339 374655 46
2340 54753 46
2341 55279 46
2342 55657 46
2343 57567 46
2344 729288 46
2345 7594 4

3073 8995 37
3074 2006 36
3075 758 36
3076 11333 36
3077 118924 36
3078 123688 36
3079 25844 36
3080 26168 36
3081 60314 36
3082 7020 36
3083 79054 36
3084 79892 36
3085 8707 36
3086 93587 36
3087 10667 36
3088 374454 36
3089 4261 36
3090 54822 36
3091 58472 36
3092 64220 36
3093 6730 36
3094 84709 36
3095 92749 36
3096 63892 36
3097 6608 36
3098 221178 36
3099 6611 36
3100 91662 36
3101 1576 36
3102 10813 36
3103 83787 36
3104 123720 36
3105 135935 36
3106 2326 36
3107 2330 36
3108 285588 36
3109 344022 36
3110 374286 36
3111 50613 36
3112 645832 36
3113 6490 36
3114 84698 36
3115 84804 36
3116 85474 36
3117 91860 36
3118 132884 35
3119 11047 35
3120 11331 35
3121 130 35
3122 23595 35
3123 51068 35
3124 7268 35
3125 9772 35
3126 9244 35
3127 400916 35
3128 4773 35
3129 9919 35
3130 7763 35
3131 1058 35
3132 1195 35
3133 1209 35
3134 200728 35
3135 22878 35
3136 285331 35
3137 3765 35
3138 4155 35
3139 64928 35
3140 89853 35
3141 90507 35
3142 6182 35
3143 795 35
3144 84894 35
3145 117

3811 55505 25
3812 55839 25
3813 57479 25
3814 6119 25
3815 6598 25
3816 6863 25
3817 6993 25
3818 79983 25
3819 813 25
3820 943 25
3821 1743 25
3822 494551 25
3823 4688 25
3824 5590 25
3825 10248 25
3826 10526 25
3827 10799 25
3828 11004 25
3829 158135 25
3830 160298 25
3831 1736 25
3832 1815 25
3833 1832 25
3834 192111 25
3835 26507 25
3836 2810 25
3837 283149 25
3838 4626 25
3839 490 25
3840 51816 25
3841 54539 25
3842 55125 25
3843 55759 25
3844 5594 25
3845 6231 25
3846 740 25
3847 79694 25
3848 80169 25
3849 8345 25
3850 9183 25
3851 9655 25
3852 991 25
3853 506 25
3854 145773 25
3855 6349 25
3856 11026 25
3857 10211 25
3858 5704 25
3859 79148 25
3860 389434 25
3861 192683 25
3862 1953 25
3863 4076 25
3864 57587 25
3865 64581 25
3866 65268 25
3867 8074 25
3868 975 25
3869 150350 25
3870 151242 25
3871 2327 25
3872 2328 25
3873 2329 25
3874 266977 25
3875 27255 25
3876 283284 25
3877 283316 25
3878 283659 25
3879 374407 25
3880 497190 25
3881 5067 25
3882 53942 25
3883 55057 25
38

4580 100529144 18
4581 26165 18
4582 27089 18
4583 4625 18
4584 899 18
4585 1101 18
4586 84545 18
4587 101928718 18
4588 121457 18
4589 26958 18
4590 4863 18
4591 51179 18
4592 575 18
4593 6390 18
4594 64066 18
4595 6901 18
4596 79442 18
4597 8913 18
4598 91 18
4599 916 18
4600 9817 18
4601 312 18
4602 2588 18
4603 645121 18
4604 128434 18
4605 6405 18
4606 139818 18
4607 306 18
4608 26156 18
4609 123745 18
4610 140687 18
4611 3101 18
4612 150763 18
4613 200312 18
4614 22838 18
4615 342618 18
4616 374819 18
4617 401934 18
4618 474170 18
4619 51301 18
4620 646862 18
4621 827 18
4622 84900 18
4623 8747 18
4624 9884 18
4625 3305 17
4626 7779 17
4627 100129969 17
4628 10713 17
4629 11129 17
4630 129642 17
4631 29895 17
4632 3918 17
4633 5139 17
4634 81576 17
4635 84259 17
4636 9727 17
4637 9796 17
4638 100132916 17
4639 100133220 17
4640 10451 17
4641 10557 17
4642 10818 17
4643 11034 17
4644 11142 17
4645 115704 17
4646 147746 17
4647 1514 17
4648 1796 17
4649 1833 17
4650 1917 17
4651 23

5419 1948 13
5420 1975 13
5421 199953 13
5422 219990 13
5423 23187 13
5424 23498 13
5425 25809 13
5426 29079 13
5427 29095 13
5428 3275 13
5429 3604 13
5430 388389 13
5431 401466 13
5432 4898 13
5433 4982 13
5434 5000 13
5435 54431 13
5436 55425 13
5437 5694 13
5438 56983 13
5439 5707 13
5440 57153 13
5441 6198 13
5442 6391 13
5443 64746 13
5444 6883 13
5445 6910 13
5446 79003 13
5447 79574 13
5448 8635 13
5449 8991 13
5450 9093 13
5451 92840 13
5452 93594 13
5453 127002 13
5454 4035 13
5455 64344 13
5456 105371242 13
5457 107984071 13
5458 50855 13
5459 11280 13
5460 113278 13
5461 441531 13
5462 8832 13
5463 120103 13
5464 13 13
5465 131096 13
5466 132014 13
5467 134526 13
5468 4868 13
5469 140679 13
5470 146167 13
5471 147719 13
5472 151258 13
5473 153201 13
5474 153218 13
5475 6453 13
5476 2249 13
5477 2256 13
5478 2259 13
5479 23415 13
5480 23416 13
5481 257 13
5482 25817 13
5483 26281 13
5484 27006 13
5485 27290 13
5486 284355 13
5487 284467 13
5488 285641 13
5489 2864 13
5490 28

6113 8079 11
6114 11240 11
6115 285381 11
6116 22995 11
6117 51706 11
6118 864 11
6119 120376 11
6120 127845 11
6121 136332 11
6122 84665 11
6123 149297 11
6124 149465 11
6125 200504 11
6126 8566 11
6127 1048 11
6128 90273 11
6129 29789 11
6130 222865 11
6131 23114 11
6132 10461 11
6133 254272 11
6134 285220 11
6135 26083 11
6136 2996 11
6137 3118 11
6138 345456 11
6139 347741 11
6140 353238 11
6141 389827 11
6142 414060 11
6143 414189 11
6144 440307 11
6145 442117 11
6146 53336 11
6147 57818 11
6148 64409 11
6149 647219 11
6150 6991 11
6151 729877 11
6152 83887 11
6153 83889 11
6154 92579 11
6155 440097 10
6156 84504 10
6157 55187 10
6158 10279 10
6159 10634 10
6160 144577 10
6161 284161 10
6162 3587 10
6163 6788 10
6164 79153 10
6165 84288 10
6166 64318 10
6167 100288797 10
6168 10159 10
6169 1109 10
6170 116437 10
6171 118429 10
6172 118471 10
6173 1234 10
6174 126075 10
6175 140710 10
6176 154754 10
6177 155060 10
6178 167555 10
6179 1762 10
6180 2081 10
6181 2145 10
6182 221322 10

6745 8634 9
6746 8759 9
6747 8788 9
6748 8799 9
6749 8846 9
6750 8942 9
6751 89801 9
6752 89876 9
6753 90353 9
6754 90523 9
6755 9055 9
6756 90586 9
6757 91137 9
6758 9144 9
6759 9145 9
6760 91947 9
6761 92960 9
6762 9647 9
6763 9692 9
6764 9724 9
6765 9936 9
6766 255626 9
6767 347733 9
6768 3423 9
6769 51000 9
6770 7511 9
6771 3563 9
6772 439996 9
6773 6130 9
6774 7265 9
6775 7442 9
6776 7852 9
6777 79699 9
6778 309 9
6779 337880 9
6780 54093 9
6781 7531 9
6782 10404 9
6783 104909134 9
6784 11054 9
6785 6443 9
6786 1510 9
6787 222255 9
6788 285025 9
6789 2889 9
6790 4535 9
6791 54997 9
6792 7276 9
6793 808 9
6794 9528 9
6795 105369243 9
6796 105376430 9
6797 5686 9
6798 83891 9
6799 51411 9
6800 25895 9
6801 113230 9
6802 273 9
6803 5224 9
6804 6884 9
6805 57687 9
6806 128866 9
6807 2213 9
6808 57463 9
6809 126129 9
6810 861 9
6811 140893 9
6812 1200 9
6813 910 9
6814 165829 9
6815 161582 9
6816 3321 9
6817 219621 9
6818 219995 9
6819 222537 9
6820 23302 9
6821 245802 9
6822 5055 9
68

7439 8224 6
7440 91526 6
7441 9311 6
7442 9690 6
7443 170591 6
7444 55681 6
7445 5976 6
7446 126298 6
7447 2287 6
7448 5052 6
7449 57606 6
7450 7320 6
7451 7321 6
7452 7322 6
7453 7324 6
7454 9219 6
7455 2963 6
7456 100507050 6
7457 5091 6
7458 26532 6
7459 4995 6
7460 7932 6
7461 10609 6
7462 2519 6
7463 26276 6
7464 29899 6
7465 54344 6
7466 57496 6
7467 5339 6
7468 26471 6
7469 340543 6
7470 1015 6
7471 146330 6
7472 22870 6
7473 2815 6
7474 29123 6
7475 5365 6
7476 54538 6
7477 55379 6
7478 55605 6
7479 5607 6
7480 6416 6
7481 7011 6
7482 5831 6
7483 10234 6
7484 11055 6
7485 1022 6
7486 23424 6
7487 54480 6
7488 8445 6
7489 8569 6
7490 113115 6
7491 2189 6
7492 23126 6
7493 27031 6
7494 2838 6
7495 3214 6
7496 5031 6
7497 54821 6
7498 6526 6
7499 79057 6
7500 9453 6
7501 11065 6
7502 2199 6
7503 26130 6
7504 7328 6
7505 130106 6
7506 133418 6
7507 1716 6
7508 222698 6
7509 23076 6
7510 3684 6
7511 3708 6
7512 4686 6
7513 55012 6
7514 57333 6
7515 5982 6
7516 6710 6
7517 7326 6
751

8246 646498 4
8247 1841 4
8248 340527 4
8249 64423 4
8250 6585 4
8251 728224 4
8252 728255 4
8253 728279 4
8254 730755 4
8255 7366 4
8256 7367 4
8257 55346 4
8258 79740 4
8259 79799 4
8260 10380 4
8261 8484 4
8262 81850 4
8263 81851 4
8264 81871 4
8265 81872 4
8266 83895 4
8267 84182 4
8268 84616 4
8269 84695 4
8270 85285 4
8271 85289 4
8272 85290 4
8273 8824 4
8274 9866 4
8275 100128338 3
8276 1629 3
8277 54908 3
8278 11012 3
8279 11202 3
8280 1215 3
8281 284366 3
8282 3003 3
8283 5653 3
8284 5657 3
8285 100131244 3
8286 100287399 3
8287 100287718 3
8288 100288966 3
8289 10113 3
8290 10556 3
8291 10785 3
8292 10874 3
8293 10979 3
8294 113130 3
8295 134701 3
8296 140458 3
8297 150726 3
8298 1520 3
8299 1613 3
8300 161823 3
8301 2039 3
8302 2137 3
8303 221400 3
8304 22874 3
8305 22911 3
8306 2295 3
8307 23108 3
8308 23195 3
8309 23264 3
8310 23285 3
8311 23526 3
8312 23542 3
8313 23567 3
8314 26259 3
8315 26260 3
8316 26262 3
8317 28952 3
8318 28973 3
8319 3631 3
8320 374654 3
8321 3855

8946 10518 2
8947 10592 2
8948 130540 2
8949 1455 2
8950 1459 2
8951 159686 2
8952 1786 2
8953 200030 2
8954 23144 2
8955 254427 2
8956 255426 2
8957 26223 2
8958 26263 2
8959 286097 2
8960 29105 2
8961 29110 2
8962 30818 2
8963 3145 2
8964 3208 2
8965 3241 2
8966 3611 2
8967 3674 2
8968 389842 2
8969 4253 2
8970 4329 2
8971 441150 2
8972 4437 2
8973 4439 2
8974 468 2
8975 51213 2
8976 51250 2
8977 51335 2
8978 5292 2
8979 5616 2
8980 5660 2
8981 56905 2
8982 574040 2
8983 57459 2
8984 5900 2
8985 60488 2
8986 6712 2
8987 7125 2
8988 729708 2
8989 730211 2
8990 7444 2
8991 79096 2
8992 80333 2
8993 8317 2
8994 83461 2
8995 83988 2
8996 84923 2
8997 84960 2
8998 8557 2
8999 8698 2
9000 8996 2
9001 90011 2
9002 90141 2
9003 9020 2
9004 91289 2
9005 91464 2
9006 92092 2
9007 93323 2
9008 93380 2
9009 9612 2
9010 999 2
9011 51009 2
9012 10908 2
9013 56996 2
9014 84519 2
9015 8852 2
9016 10260 2
9017 10723 2
9018 1301 2
9019 134492 2
9020 1355 2
9021 26580 2
9022 26993 2
9023 284695 2
9024 

9719 56344 1
9720 6193 1
9721 64403 1
9722 64866 1
9723 65124 1
9724 6513 1
9725 662 1
9726 6678 1
9727 6778 1
9728 728841 1
9729 7311 1
9730 79002 1
9731 79180 1
9732 79801 1
9733 79858 1
9734 79902 1
9735 8209 1
9736 823 1
9737 828 1
9738 84253 1
9739 8449 1
9740 84634 1
9741 84893 1
9742 85014 1
9743 8550 1
9744 8737 1
9745 90070 1
9746 92565 1
9747 9262 1
9748 93408 1
9749 9891 1
9750 6536 1
9751 7434 1
9752 79901 1
9753 100506164 1
9754 10602 1
9755 10651 1
9756 10654 1
9757 118487 1
9758 126526 1
9759 1289 1
9760 1302 1
9761 1349 1
9762 165918 1
9763 1903 1
9764 1958 1
9765 23161 1
9766 23263 1
9767 255631 1
9768 25920 1
9769 27132 1
9770 2802 1
9771 29117 1
9772 2934 1
9773 29988 1
9774 345778 1
9775 4118 1
9776 4208 1
9777 4351 1
9778 51042 1
9779 51142 1
9780 51538 1
9781 51759 1
9782 54534 1
9783 55526 1
9784 56339 1
9785 57231 1
9786 57679 1
9787 5995 1
9788 64965 1
9789 6870 1
9790 6917 1
9791 80139 1
9792 80852 1
9793 80854 1
9794 83992 1
9795 84109 1
9796 8412 1
9797 8499

10428 10399 1
10429 2703 1
10430 29109 1
10431 56128 1
10432 57165 1
10433 80335 1
10434 81025 1
10435 100129982 1
10436 100131196 1
10437 100131863 1
10438 100132074 1
10439 100132163 1
10440 100132565 1
10441 100132795 1
10442 100506055 1
10443 100507739 1
10444 100526832 1
10445 100616101 1
10446 10098 1
10447 10103 1
10448 10266 1
10449 10277 1
10450 10393 1
10451 10579 1
10452 10688 1
10453 10690 1
10454 10844 1
10455 10923 1
10456 10974 1
10457 11035 1
10458 11062 1
10459 113157 1
10460 122589 1
10461 122961 1
10462 123207 1
10463 125875 1
10464 129080 1
10465 1297 1
10466 130507 1
10467 130888 1
10468 1310 1
10469 133015 1
10470 134285 1
10471 134288 1
10472 134549 1
10473 137872 1
10474 140851 1
10475 1434 1
10476 1539 1
10477 157773 1
10478 158055 1
10479 158358 1
10480 160419 1
10481 167153 1
10482 1820 1
10483 1908 1
10484 221 1
10485 221421 1
10486 2254 1
10487 22977 1
10488 23032 1
10489 23418 1
10490 246744 1
10491 2498 1
10492 27166 1
10493 27175 1
10494 27190 1
10495 27

In [93]:
Filtereddict = pd.DataFrame.from_dict(newG.degree(), orient='index').loc[newnames.astype(str)].dropna().to_dict()[0]
Degreelist = list(Filtereddict)
Sortdl = sorted(Degreelist, key=Filtereddict.get, reverse=True)
c=-1
for i in Sortdl:
    c+=1
    print(c,i,Filtereddict[i])

0 89832 3943.0
1 101929970 3415.0
2 1140 3413.0
3 1141 3413.0
4 1135 3413.0
5 3359 3405.0
6 57053 3399.0
7 8973 3396.0
8 9177 3271.0
9 6564 2817.0
10 100289279 2577.0
11 105369239 2377.0
12 2570 2113.0
13 441024 2069.0
14 10351 1417.0
15 26154 1417.0
16 79007 1372.0
17 246100 1280.0
18 63950 1278.0
19 80164 1278.0
20 10840 1211.0
21 79875 1202.0
22 100533105 1157.0
23 23620 1118.0
24 118461 1109.0
25 389333 1109.0
26 2565 1106.0
27 10394 1092.0
28 9037 1088.0
29 10350 1081.0
30 101927446 1047.0
31 8992 912.0
32 60495 899.0
33 646643 898.0
34 102800317 898.0
35 100130827 898.0
36 85320 888.0
37 223117 884.0
38 139728 883.0
39 25981 844.0
40 102724428 793.0
41 200959 788.0
42 2568 787.0
43 55879 787.0
44 92270 779.0
45 387775 768.0
46 9390 768.0
47 9389 768.0
48 146429 768.0
49 282973 762.0
50 11181 725.0
51 114336 720.0
52 114335 720.0
53 7252 720.0
54 94115 720.0
55 94027 720.0
56 3972 720.0
57 93659 720.0
58 2657 718.0
59 27202 686.0
60 55356 674.0
61 8482 670.0
62 119548 669.0
63 306

546 115350 111.0
547 8858 111.0
548 83416 111.0
549 125931 111.0
550 388551 111.0
551 80310 110.0
552 100532726 109.0
553 345895 108.0
554 222642 108.0
555 50846 108.0
556 440073 108.0
557 81492 108.0
558 157777 107.0
559 222967 107.0
560 79906 107.0
561 254956 107.0
562 84443 107.0
563 24141 106.0
564 57151 105.0
565 360200 105.0
566 119180 105.0
567 84569 105.0
568 23732 104.0
569 390110 103.0
570 126520 103.0
571 51166 103.0
572 89792 102.0
573 105376575 102.0
574 136541 102.0
575 284293 102.0
576 400668 102.0
577 25823 102.0
578 401262 102.0
579 339906 102.0
580 256394 102.0
581 23541 101.0
582 2151 101.0
583 730005 101.0
584 93035 99.0
585 203430 99.0
586 729475 99.0
587 388121 99.0
588 100861540 98.0
589 1551 98.0
590 29063 98.0
591 8277 96.0
592 201229 96.0
593 163933 96.0
594 644096 96.0
595 3902 96.0
596 153770 95.0
597 121643 95.0
598 79750 95.0
599 84518 95.0
600 440498 95.0
601 5334 95.0
602 169355 95.0
603 80831 95.0
604 147965 95.0
605 51480 95.0
606 425054 95.0
607 10192

1171 2738 46.0
1172 85508 46.0
1173 55279 46.0
1174 169834 46.0
1175 220929 46.0
1176 195828 46.0
1177 353143 46.0
1178 100533496 46.0
1179 127665 46.0
1180 57567 46.0
1181 374655 46.0
1182 7789 46.0
1183 25850 46.0
1184 353131 46.0
1185 116412 46.0
1186 353140 46.0
1187 26239 46.0
1188 353134 46.0
1189 7594 46.0
1190 22834 46.0
1191 353137 46.0
1192 353133 46.0
1193 160365 46.0
1194 128611 46.0
1195 109504726 46.0
1196 124961 46.0
1197 353135 46.0
1198 285349 46.0
1199 8302 46.0
1200 7693 46.0
1201 284346 46.0
1202 644353 45.0
1203 340267 45.0
1204 148811 45.0
1205 100507055 45.0
1206 10265 45.0
1207 79190 45.0
1208 100533467 45.0
1209 5554 45.0
1210 389792 45.0
1211 79191 45.0
1212 284525 45.0
1213 728695 45.0
1214 107984060 45.0
1215 494118 45.0
1216 494119 45.0
1217 153572 45.0
1218 389383 45.0
1219 55001 45.0
1220 5545 45.0
1221 5555 45.0
1222 494197 45.0
1223 9331 45.0
1224 161176 45.0
1225 341418 44.0
1226 390323 44.0
1227 254783 44.0
1228 121364 44.0
1229 119694 44.0
1230 40166

1771 344561 33.0
1772 548313 33.0
1773 2828 33.0
1774 29909 33.0
1775 387509 33.0
1776 60437 33.0
1777 338557 33.0
1778 5462 33.0
1779 151449 33.0
1780 5030 33.0
1781 27239 33.0
1782 53829 33.0
1783 338442 33.0
1784 54112 33.0
1785 3209 33.0
1786 53836 33.0
1787 79470 33.0
1788 2847 33.0
1789 27199 33.0
1790 353345 33.0
1791 3360 33.0
1792 10886 33.0
1793 255220 33.0
1794 9424 33.0
1795 84636 33.0
1796 6335 33.0
1797 118442 33.0
1798 59340 33.0
1799 11245 33.0
1800 29933 33.0
1801 151306 33.0
1802 8111 33.0
1803 124274 33.0
1804 165140 33.0
1805 653 33.0
1806 11250 33.0
1807 2842 33.0
1808 57121 33.0
1809 107986096 33.0
1810 729966 32.0
1811 85012 32.0
1812 6557 32.0
1813 222223 32.0
1814 6781 32.0
1815 101930059 32.0
1816 246721 32.0
1817 100288695 32.0
1818 148545 32.0
1819 100526772 32.0
1820 57535 32.0
1821 653513 32.0
1822 653149 32.0
1823 123103 32.0
1824 100288142 32.0
1825 107983955 32.0
1826 285852 31.0
1827 91937 31.0
1828 93010 31.0
1829 342865 31.0
1830 7441 31.0
1831 34251

2490 113622 12.0
2491 6556 12.0
2492 92737 12.0
2493 254263 12.0
2494 387712 12.0
2495 378950 12.0
2496 140876 12.0
2497 728130 12.0
2498 58510 12.0
2499 64211 12.0
2500 727866 12.0
2501 359787 12.0
2502 100506403 12.0
2503 107983990 12.0
2504 389072 12.0
2505 10022 12.0
2506 11172 12.0
2507 623 12.0
2508 441457 12.0
2509 6019 12.0
2510 378948 12.0
2511 6013 12.0
2512 222545 12.0
2513 129684 12.0
2514 390874 12.0
2515 378949 12.0
2516 284358 12.0
2517 378951 12.0
2518 347741 11.0
2519 101060376 11.0
2520 101060351 11.0
2521 102723859 11.0
2522 440307 11.0
2523 414189 11.0
2524 127845 11.0
2525 120376 11.0
2526 100528030 11.0
2527 101060301 11.0
2528 107080638 11.0
2529 414060 11.0
2530 102724862 11.0
2531 101060321 11.0
2532 345456 11.0
2533 222865 11.0
2534 3118 11.0
2535 83887 11.0
2536 23114 11.0
2537 92579 11.0
2538 647219 11.0
2539 136332 11.0
2540 101060389 11.0
2541 729877 11.0
2542 64409 11.0
2543 6991 11.0
2544 149465 11.0
2545 57818 11.0
2546 149297 11.0
2547 11240 11.0
2548 

In [199]:
#selected_names = np.random.choice(newnames,3,replace=False).astype(str).tolist()
selected_names = ['56138','80852','552891','6441']

selected_nodes = []
for name in selected_names:
    selected_nodes = selected_nodes + G.neighbors(name) + [name]
# for comb in itertools.combinations(selected_names, 2):
#     selected_nodes = selected_nodes + nx.shortest_path(G, source=comb[0], target=comb[1], weight=None)
unique_nodes = list(set(selected_nodes))
# for name in unique_nodes:
#     selected_nodes = selected_nodes + G.neighbors(name) + [name]
# unique_nodes = list(set(selected_nodes))

sG = G.subgraph(unique_nodes)

len(sG.edges())

31

In [200]:
data = json_graph.node_link_data(sG)

In [201]:
colattr = dict.fromkeys(G.degree(), "white")
# for i in selected_names:
#     if i in colattr:
#         colattr[i] = "#3182BD"
for i in newnames.astype(str):
    if i in colattr:
        colattr[i] = "#E6550D"


for i in range(len(data['nodes'])):
    data['nodes'][i]['color'] = colattr[data['nodes'][i]['id']]

In [202]:
protnames = pd.read_table('~/work/protein_names.tsv', engine="c", index_col=3, dtype='str')
symbdict = protnames['symbol'].to_dict()
namesdict = protnames['name'].to_dict()

for i in range(len(data['nodes'])):
    if int(data['nodes'][i]['id']) in symbdict:
        data['nodes'][i]['symb'] = symbdict[int(data['nodes'][i]['id'])]
        data['nodes'][i]['protname'] = namesdict[int(data['nodes'][i]['id'])]
    else:
        data['nodes'][i]['symb'] = 'None'
        data['nodes'][i]['protname'] = 'None'
        print('none')
    

In [203]:
radattr = dict.fromkeys(G.degree(), 5)
for i in newnames.astype(str):
    if i in radattr:
        radattr[i] = 5
for i in selected_names:
    if i in radattr:
        radattr[i] = 10

        
for i in range(len(data['nodes'])):
    data['nodes'][i]['rad'] = radattr[data['nodes'][i]['id']]

In [204]:
with open('../network.json', 'w') as f:
    json.dump(data, f)

with open('../w/data/network.json', 'w') as f:
    json.dump(data, f)

In [205]:
len(newnames)

3116

In [208]:
np.sum(labels.astype(int))

298535

In [211]:
df.columns = ["Entrez ID Protein 1", "Entrez ID Protein 2", "Predicted Interaction"]

In [213]:
df.to_csv('../predictions.csv')